From 23398dce4664be46eb4b295ea2dbd67ba340e9c0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 11:42:24 +0200 Subject: [PATCH 01/40] [susy] regenerate susy_gg_tt and susy_gg_t1t1 with Olivier's PR #850 patch for #825 and #826 --- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 16 +++++++------- .../susy_gg_t1t1.mad/Cards/run_card.dat | 1 - .../Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 ++++--- .../susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h | 10 ++++----- .../CODEGEN_mad_susy_gg_tt_log.txt | 21 +++++++++++-------- .../cudacpp/susy_gg_tt.mad/Cards/run_card.dat | 1 - .../susy_gg_tt.mad/Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 ++++--- .../susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h | 8 +++---- 10 files changed, 37 insertions(+), 36 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 2b28aa829b..0637d12545 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,19 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s Wrote files for 16 helas calls in 0.117 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.195 s +ALOHA: aloha creates 3 routines in 0.194 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.192 s +ALOHA: aloha creates 6 routines in 0.191 s VVV1 VSS1 VSS1 @@ -645,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.918s -user 0m2.669s -sys 0m0.245s +real 0m3.116s +user 0m2.655s +sys 0m0.262s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat index eec3cd72ba..57e8e92627 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat index e4a6794d59..d1a260cb8a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h index bafab1ac2e..dc96852e85 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h @@ -947,7 +947,7 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP3 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP4 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP4 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP1 ) + V3[2] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[3] = denom * ( TMP4 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP1 ) + V3[3] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[4] = denom * ( TMP4 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP1 ) + V3[4] * ( +cI * TMP2 - cI * TMP3 ) ) ); @@ -979,7 +979,7 @@ namespace mg5amcCpu const fptype_sv P3[4] = { +cxreal( S3[0] ), +cxreal( S3[1] ), +cximag( S3[1] ), +cximag( S3[0] ) }; const cxtype_sv TMP5 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP6 = ( P3[0] * V1[2] - P3[1] * V1[3] - P3[2] * V1[4] - P3[3] * V1[5] ); - ( *vertex ) = COUP * S2[2] * S3[2] * ( -cI * TMP5 + cI * TMP6 ); + ( *vertex ) = Ccoeff * COUP * S2[2] * S3[2] * ( -cI * TMP5 + cI * TMP6 ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1009,7 +1009,7 @@ namespace mg5amcCpu const fptype_sv P2[4] = { -cxreal( S2[0] ), -cxreal( S2[1] ), -cximag( S2[1] ), -cximag( S2[0] ) }; const cxtype_sv TMP5 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP6 = ( P3[0] * V1[2] - P3[1] * V1[3] - P3[2] * V1[4] - P3[3] * V1[5] ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); S2[2] = denom * S3[2] * ( +cI * TMP5 - cI * TMP6 ); mgDebug( 1, __FUNCTION__ ); return; @@ -1040,7 +1040,7 @@ namespace mg5amcCpu const fptype_sv P3[4] = { -cxreal( S3[0] ), -cxreal( S3[1] ), -cximag( S3[1] ), -cximag( S3[0] ) }; const cxtype_sv TMP5 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP6 = ( P3[0] * V1[2] - P3[1] * V1[3] - P3[2] * V1[4] - P3[3] * V1[5] ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); S3[2] = denom * S2[2] * ( +cI * TMP5 - cI * TMP6 ); mgDebug( 1, __FUNCTION__ ); return; @@ -1068,7 +1068,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP7 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); - ( *vertex ) = COUP * -cI * TMP7 * S4[2] * S3[2]; + ( *vertex ) = Ccoeff * COUP * -cI * TMP7 * S4[2] * S3[2]; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index bfb96cff2a..bc5212197d 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -59,6 +59,9 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 +INFO: load particles +INFO: load vertices +DEBUG: model prefixing takes 0.9616334438323975  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -554,7 +557,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.126 s +1 processes with 3 diagrams generated in 0.114 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +579,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,7 +595,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -602,7 +605,7 @@ ALOHA: aloha creates 2 routines in 0.145 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.142 s VVV1 FFV1 FFV1 @@ -638,10 +641,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.857s -user 0m2.538s -sys 0m0.270s -Code generation completed in 3 seconds +real 0m4.152s +user 0m3.711s +sys 0m0.234s +Code generation completed in 5 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat index eec3cd72ba..57e8e92627 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat index e4a6794d59..d1a260cb8a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h index 45fecfbc22..2519e3902b 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h @@ -934,7 +934,7 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP3 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP4 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP4 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP1 ) + V3[2] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[3] = denom * ( TMP4 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP1 ) + V3[3] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[4] = denom * ( TMP4 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP1 ) + V3[4] * ( +cI * TMP2 - cI * TMP3 ) ) ); @@ -963,7 +963,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP5 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP5; + ( *vertex ) = Ccoeff * COUP * -cI * TMP5; mgDebug( 1, __FUNCTION__ ); return; } @@ -991,7 +991,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1023,7 +1023,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); From 6d15bf91be496e9319e7eb333fc5c62a4914f5b7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 11:48:25 +0200 Subject: [PATCH 02/40] [susy] quick tmad test on susy_gg_tt and susy_gg_t1t1: cpp/fortran xsec now match in the former (#825 is fixed), but there is still no xsec in the latter (#826 is not fixed) ./tmad/teeMadX.sh -susyggtt -susyggt1t1 +10x --- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 24 +- .../log_susyggtt_mad_d_inl0_hrd0.txt | 493 +++++++++++++++++- 2 files changed, 489 insertions(+), 28 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 059122dda6..e20853bb7a 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-16_06:00:05 +DATE: 2024-05-30_11:47:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4148s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.63E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3216s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3122s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s - [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4297s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3279s + [COUNTERS] Fortran MEs ( 1 ) : 0.1019s for 90112 events => throughput is 8.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index fd24a61552..7e8cfcc6ca 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-16_05:59:47 +DATE: 2024-05-30_11:46:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s - [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7899s + [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3838s + [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s - [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8353s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3481s + [COUNTERS] Fortran MEs ( 1 ) : 0.4872s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -131,12 +131,473 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 - [UNWEIGHT] Wrote 2338 events (found 3965 events) - [COUNTERS] PROGRAM TOTAL : 0.7007s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s + [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4670s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4211s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0459s for 8192 events => throughput is 1.78E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273026311101) differ by more than 3E-14 (2.852401832941188) +OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419863) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 + [UNWEIGHT] Wrote 1743 events (found 1748 events) + [COUNTERS] PROGRAM TOTAL : 1.8478s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3414s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 90112 events => throughput is 1.78E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.821549e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.828539e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4018s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 8192 events => throughput is 3.18E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 + [UNWEIGHT] Wrote 1743 events (found 1748 events) + [COUNTERS] PROGRAM TOTAL : 1.6042s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3203s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2839s for 90112 events => throughput is 3.17E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.236663e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.265774e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3922s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 + [UNWEIGHT] Wrote 1743 events (found 1748 events) + [COUNTERS] PROGRAM TOTAL : 1.4898s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 90112 events => throughput is 5.03E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.982338e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.050150e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4039s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 + [UNWEIGHT] Wrote 1743 events (found 1748 events) + [COUNTERS] PROGRAM TOTAL : 1.4735s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3097s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1638s for 90112 events => throughput is 5.50E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.390955e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.551520e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.4224s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3988s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.46E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 + [UNWEIGHT] Wrote 1743 events (found 1748 events) + [COUNTERS] PROGRAM TOTAL : 1.5849s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2623s for 90112 events => throughput is 3.44E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.534035e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.557058e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1 + [UNWEIGHT] Wrote 1603 events (found 1608 events) + [COUNTERS] PROGRAM TOTAL : 0.8612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8606s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.598860065419856) and cuda (44.598860065419849) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 + [UNWEIGHT] Wrote 1743 events (found 1748 events) + [COUNTERS] PROGRAM TOTAL : 1.7451s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7382s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.577523870256456) and cuda (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.847950e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.628809e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.649520e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.073840e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.642142e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155791e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.619665e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.071359e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED From 6cbb606cd00ed1eca6fb605bd329df8f8d13ef1a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 11:59:30 +0200 Subject: [PATCH 03/40] [susy] regenerate all processes after Olivier's WIP patch in PR #850 - note that HelAmps.h changes in (almost?) all processes! (NB it is HelAmps.h that changes and not Parameters.h as I wrote elsewhere by mistake) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 20 ++-- epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat | 1 - .../ee_mumu.mad/Cards/run_card_default.dat | 1 - .../ee_mumu.mad/bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h | 12 +-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +- epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h | 12 +-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 20 ++-- epochX/cudacpp/gg_tt.mad/Cards/run_card.dat | 1 - .../gg_tt.mad/Cards/run_card_default.dat | 1 - .../gg_tt.mad/bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 8 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 14 +-- epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h | 8 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 ++--- .../cudacpp/gg_tt01g.mad/Cards/run_card.dat | 1 - .../gg_tt01g.mad/Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h | 18 ++-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++-- epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat | 1 - .../gg_ttg.mad/Cards/run_card_default.dat | 1 - .../gg_ttg.mad/bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h | 18 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +- epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h | 18 ++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 22 ++--- epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat | 1 - .../gg_ttgg.mad/Cards/run_card_default.dat | 1 - .../gg_ttgg.mad/bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h | 24 ++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 ++-- epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h | 24 ++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 ++--- .../cudacpp/gg_ttggg.mad/Cards/run_card.dat | 1 - .../gg_ttggg.mad/Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h | 24 ++--- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 ++-- epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h | 24 ++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 ++-- epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat | 1 - .../gq_ttq.mad/Cards/run_card_default.dat | 1 - .../gq_ttq.mad/bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h | 10 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 16 ++-- epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h | 10 +- .../CODEGEN_mad_heft_gg_bb_log.txt | 14 +-- .../cudacpp/heft_gg_bb.mad/Cards/run_card.dat | 1 - .../heft_gg_bb.mad/Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 +- .../cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h | 12 +-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 8 +- .../cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h | 12 +-- .../CODEGEN_mad_pp_tt012j_log.txt | 94 +++++++++---------- .../cudacpp/pp_tt012j.mad/Cards/run_card.dat | 1 - .../pp_tt012j.mad/Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 +- epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h | 24 ++--- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 ++-- .../smeft_gg_tttt.mad/Cards/run_card.dat | 1 - .../Cards/run_card_default.dat | 1 - .../bin/internal/launch_plugin.py | 7 +- .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h | 18 ++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 16 ++-- .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h | 18 ++-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 ++-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 10 +- .../susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h | 10 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 21 ++--- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 15 ++- .../susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h | 8 +- 72 files changed, 415 insertions(+), 431 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index f08289bad0..cdd2c3d44d 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057582855224609375  +DEBUG: model prefixing takes 0.0059506893157958984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -176,8 +176,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.103 s +Wrote files for 8 helas calls in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.209 s +ALOHA: aloha creates 3 routines in 0.213 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.272 s +ALOHA: aloha creates 7 routines in 0.271 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.930s -user 0m1.716s -sys 0m0.204s +real 0m2.071s +user 0m1.736s +sys 0m0.221s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat b/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat index 450acfcfd8..f0c435a829 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat @@ -201,6 +201,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat b/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat index 171578e425..2324df4e21 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat @@ -201,6 +201,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index 602510d096..74d9a80c1a 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -978,7 +978,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP0 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP0; + ( *vertex ) = Ccoeff * COUP * -cI * TMP0; mgDebug( 1, __FUNCTION__ ); return; } @@ -1005,7 +1005,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1034,7 +1034,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); - ( *vertex ) = COUP * -cI * TMP1; + ( *vertex ) = Ccoeff * COUP * -cI * TMP1; mgDebug( 1, __FUNCTION__ ); return; } @@ -1063,7 +1063,7 @@ namespace mg5amcCpu V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] - P3[1] * OM3 * TMP2 ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] ) + cI * ( F1[3] * F2[4] ) - P3[2] * OM3 * TMP2 ); @@ -1095,7 +1095,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = COUP * ( -one ) * ( +cI * TMP1 + ( two * cI ) * TMP3 ); + ( *vertex ) = Ccoeff * COUP * ( -one ) * ( +cI * TMP1 + ( two * cI ) * TMP3 ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1127,7 +1127,7 @@ namespace mg5amcCpu constexpr fptype half( 1. / 2. ); const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -two * cI ) * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ); V3[3] = denom * ( -two * cI ) * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ); V3[4] = denom * ( two * cI ) * ( OM3 * half * P3[2] * ( TMP2 + two * TMP4 ) + ( half * cI * ( F1[2] * F2[5] ) - half * cI * ( F1[3] * F2[4] ) - cI * ( F1[4] * F2[3] ) + cI * ( F1[5] * F2[2] ) ) ); diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index f845f639cc..065a63b45c 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00582575798034668  +DEBUG: model prefixing takes 0.005798816680908203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.287 s +ALOHA: aloha creates 4 routines in 0.290 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.836s -user 0m0.670s -sys 0m0.059s +real 0m0.703s +user 0m0.648s +sys 0m0.045s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index 602510d096..74d9a80c1a 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -978,7 +978,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP0 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP0; + ( *vertex ) = Ccoeff * COUP * -cI * TMP0; mgDebug( 1, __FUNCTION__ ); return; } @@ -1005,7 +1005,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1034,7 +1034,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); - ( *vertex ) = COUP * -cI * TMP1; + ( *vertex ) = Ccoeff * COUP * -cI * TMP1; mgDebug( 1, __FUNCTION__ ); return; } @@ -1063,7 +1063,7 @@ namespace mg5amcCpu V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] - P3[1] * OM3 * TMP2 ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] ) + cI * ( F1[3] * F2[4] ) - P3[2] * OM3 * TMP2 ); @@ -1095,7 +1095,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = COUP * ( -one ) * ( +cI * TMP1 + ( two * cI ) * TMP3 ); + ( *vertex ) = Ccoeff * COUP * ( -one ) * ( +cI * TMP1 + ( two * cI ) * TMP3 ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1127,7 +1127,7 @@ namespace mg5amcCpu constexpr fptype half( 1. / 2. ); const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -two * cI ) * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ); V3[3] = denom * ( -two * cI ) * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ); V3[4] = denom * ( two * cI ) * ( OM3 * half * P3[2] * ( TMP2 + two * TMP4 ) + ( half * cI * ( F1[2] * F2[5] ) - half * cI * ( F1[3] * F2[4] ) - cI * ( F1[4] * F2[3] ) + cI * ( F1[5] * F2[2] ) ) ); diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index bcd13f1f43..67447d42d3 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005464315414428711  +DEBUG: model prefixing takes 0.005841732025146484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.105 s +Wrote files for 10 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.155 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.142 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.771s -user 0m1.550s -sys 0m0.221s +real 0m2.079s +user 0m1.557s +sys 0m0.240s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/Cards/run_card.dat b/epochX/cudacpp/gg_tt.mad/Cards/run_card.dat index eec3cd72ba..57e8e92627 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/run_card.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/run_card.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_tt.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_tt.mad/Cards/run_card_default.dat index e4a6794d59..d1a260cb8a 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/run_card_default.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 7e276dc57f..1c2d0cd26a 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -934,7 +934,7 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP3 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP4 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP4 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP1 ) + V3[2] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[3] = denom * ( TMP4 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP1 ) + V3[3] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[4] = denom * ( TMP4 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP1 ) + V3[4] * ( +cI * TMP2 - cI * TMP3 ) ) ); @@ -963,7 +963,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP5 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP5; + ( *vertex ) = Ccoeff * COUP * -cI * TMP5; mgDebug( 1, __FUNCTION__ ); return; } @@ -991,7 +991,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1023,7 +1023,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 74599408a5..ccc526907f 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005803108215332031  +DEBUG: model prefixing takes 0.005822658538818359  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -183,7 +183,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.149 s +ALOHA: aloha creates 2 routines in 0.153 s VVV1 FFV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.703s -user 0m0.492s -sys 0m0.049s -Code generation completed in 0 seconds +real 0m0.642s +user 0m0.515s +sys 0m0.048s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h index 7e276dc57f..1c2d0cd26a 100644 --- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h @@ -934,7 +934,7 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP3 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP4 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP4 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP1 ) + V3[2] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[3] = denom * ( TMP4 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP1 ) + V3[3] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[4] = denom * ( TMP4 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP1 ) + V3[4] * ( +cI * TMP2 - cI * TMP3 ) ) ); @@ -963,7 +963,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP5 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP5; + ( *vertex ) = Ccoeff * COUP * -cI * TMP5; mgDebug( 1, __FUNCTION__ ); return; } @@ -991,7 +991,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1023,7 +1023,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index e0bc256894..68c32ffdec 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005784273147583008  +DEBUG: model prefixing takes 0.0058231353759765625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -187,8 +187,8 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -204,8 +204,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -221,14 +221,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s -Wrote files for 46 helas calls in 0.279 s +Wrote files for 46 helas calls in 0.257 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.348 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.330 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.389s -user 0m2.141s -sys 0m0.224s +real 0m2.440s +user 0m2.187s +sys 0m0.229s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat index 8deeaf24eb..31ed9cdc82 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat @@ -180,6 +180,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat index d8ad330882..3433f1792c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat @@ -180,6 +180,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h index b64ded59e1..5742cd4648 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h @@ -1003,7 +1003,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1037,7 +1037,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1066,7 +1066,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1094,7 +1094,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1126,7 +1126,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1157,7 +1157,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1192,7 +1192,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP10 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP10 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP10 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP10 ) ); @@ -1227,7 +1227,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP11 ) ); @@ -1262,7 +1262,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP10 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP11 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP10 ) + cI * ( V2[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP10 ) + cI * ( V2[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP10 ) + cI * ( V2[4] * TMP11 ) ); diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index d226034616..469d85f9a8 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005830287933349609  +DEBUG: model prefixing takes 0.00583338737487793  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.156 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s +Wrote files for 36 helas calls in 0.159 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.351 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.338 s VVV1 VVV1 FFV1 @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.256s -user 0m2.049s -sys 0m0.205s +real 0m2.380s +user 0m2.064s +sys 0m0.237s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat b/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat index 00b6665519..de3ff3f0bc 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat @@ -162,6 +162,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat index 6b21274400..b9ed04b858 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat @@ -162,6 +162,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h index b64ded59e1..5742cd4648 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h @@ -1003,7 +1003,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1037,7 +1037,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1066,7 +1066,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1094,7 +1094,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1126,7 +1126,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1157,7 +1157,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1192,7 +1192,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP10 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP10 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP10 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP10 ) ); @@ -1227,7 +1227,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP11 ) ); @@ -1262,7 +1262,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP10 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP11 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP10 ) + cI * ( V2[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP10 ) + cI * ( V2[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP10 ) + cI * ( V2[4] * TMP11 ) ); diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index e94069458a..982f4861c0 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005761384963989258  +DEBUG: model prefixing takes 0.005800724029541016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.345 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.797s -user 0m0.737s -sys 0m0.056s +real 0m0.821s +user 0m0.755s +sys 0m0.061s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h index b64ded59e1..5742cd4648 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h @@ -1003,7 +1003,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1037,7 +1037,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1066,7 +1066,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1094,7 +1094,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1126,7 +1126,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1157,7 +1157,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1192,7 +1192,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP10 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP10 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP10 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP10 ) ); @@ -1227,7 +1227,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP11 ) ); @@ -1262,7 +1262,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP10 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP11 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP10 ) + cI * ( V2[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP10 ) + cI * ( V2[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP10 ) + cI * ( V2[4] * TMP11 ) ); diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 2ce03bcd21..33527b2add 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005746126174926758  +DEBUG: model prefixing takes 0.005804300308227539  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.164 s +1 processes with 123 diagrams generated in 0.168 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.452 s -Wrote files for 222 helas calls in 0.720 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.454 s +Wrote files for 222 helas calls in 0.737 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.343 s +ALOHA: aloha creates 5 routines in 0.355 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.325 s +ALOHA: aloha creates 10 routines in 0.339 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.664s -user 0m3.149s -sys 0m0.231s +real 0m3.683s +user 0m3.189s +sys 0m0.260s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat index 5ded3fdf05..0fce7ad1d4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat @@ -187,6 +187,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat index ce692819d0..2daf1af617 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat @@ -187,6 +187,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h index fac5316937..bcf4333c78 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h @@ -1042,7 +1042,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1076,7 +1076,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1133,7 +1133,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1165,7 +1165,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1196,7 +1196,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1230,7 +1230,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1261,7 +1261,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP11 ) ); @@ -1295,7 +1295,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1326,7 +1326,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP12 ) ); @@ -1360,7 +1360,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1391,7 +1391,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP11 ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP11 ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP11 ) + cI * ( V2[4] * TMP12 ) ); diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 88a6d07d2d..f5a1a02359 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005809783935546875  +DEBUG: model prefixing takes 0.0058460235595703125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.165 s +1 processes with 123 diagrams generated in 0.168 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.453 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.333 s +ALOHA: aloha creates 5 routines in 0.340 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.630s -user 0m1.428s -sys 0m0.058s -Code generation completed in 2 seconds +real 0m1.535s +user 0m1.452s +sys 0m0.066s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h index fac5316937..bcf4333c78 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h @@ -1042,7 +1042,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1076,7 +1076,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1133,7 +1133,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1165,7 +1165,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1196,7 +1196,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1230,7 +1230,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1261,7 +1261,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP11 ) ); @@ -1295,7 +1295,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1326,7 +1326,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP12 ) ); @@ -1360,7 +1360,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1391,7 +1391,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP11 ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP11 ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP11 ) + cI * ( V2[4] * TMP12 ) ); diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 2588d0cd2e..9c83cf7313 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005673885345458984  +DEBUG: model prefixing takes 0.00582122802734375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.945 s +1 processes with 1240 diagrams generated in 2.002 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -179,8 +179,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.855 s -Wrote files for 2281 helas calls in 19.401 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.995 s +Wrote files for 2281 helas calls in 19.672 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.338 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.322 s +ALOHA: aloha creates 10 routines in 0.332 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m30.499s -user 0m29.983s -sys 0m0.419s +real 0m31.033s +user 0m30.516s +sys 0m0.412s Code generation completed in 31 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat index bf717ebbbb..6d135b2f13 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat @@ -193,6 +193,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat index 2671297a2b..647fc5dd9c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat @@ -193,6 +193,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h index fac5316937..bcf4333c78 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h @@ -1042,7 +1042,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1076,7 +1076,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1133,7 +1133,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1165,7 +1165,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1196,7 +1196,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1230,7 +1230,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1261,7 +1261,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP11 ) ); @@ -1295,7 +1295,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1326,7 +1326,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP12 ) ); @@ -1360,7 +1360,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1391,7 +1391,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP11 ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP11 ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP11 ) + cI * ( V2[4] * TMP12 ) ); diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 6b5074a2c1..7a363f97bd 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005708456039428711  +DEBUG: model prefixing takes 0.005854606628417969  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.997 s +1 processes with 1240 diagrams generated in 2.003 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.860 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 7.018 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.365 s +ALOHA: aloha creates 5 routines in 0.379 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.606s -user 0m13.435s -sys 0m0.117s -Code generation completed in 14 seconds +real 0m13.875s +user 0m13.703s +sys 0m0.106s +Code generation completed in 13 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h index fac5316937..bcf4333c78 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h @@ -1042,7 +1042,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1076,7 +1076,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1133,7 +1133,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1165,7 +1165,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1196,7 +1196,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1230,7 +1230,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1261,7 +1261,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP11 ) ); @@ -1295,7 +1295,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1326,7 +1326,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP12 ) ); @@ -1360,7 +1360,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1391,7 +1391,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP11 ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP11 ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP11 ) + cI * ( V2[4] * TMP12 ) ); diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 60a7e04bfe..7871ea657d 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005821704864501953  +DEBUG: model prefixing takes 0.005815744400024414  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -200,8 +200,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,8 +217,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -234,7 +234,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.236 s +Wrote files for 32 helas calls in 0.233 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines @@ -243,7 +243,7 @@ ALOHA: aloha creates 2 routines in 0.156 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.140 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.028s -user 0m1.809s -sys 0m0.218s +real 0m2.029s +user 0m1.803s +sys 0m0.226s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat b/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat index a47052aa69..78b7365b43 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat @@ -164,6 +164,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat b/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat index e0d4a28243..bd13e7fcb1 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat @@ -164,6 +164,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h index 7b27c576ea..81f8722a61 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h @@ -937,7 +937,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP0 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP0; + ( *vertex ) = Ccoeff * COUP * -cI * TMP0; mgDebug( 1, __FUNCTION__ ); return; } @@ -965,7 +965,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -997,7 +997,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1028,7 +1028,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1068,7 +1068,7 @@ namespace mg5amcCpu const cxtype_sv TMP7 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP8 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP9 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP2 + cI * TMP3 ) + ( TMP5 * ( +cI * TMP4 - cI * TMP6 ) + TMP7 * ( -cI * TMP8 + cI * TMP9 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP2 + cI * TMP3 ) + ( TMP5 * ( +cI * TMP4 - cI * TMP6 ) + TMP7 * ( -cI * TMP8 + cI * TMP9 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index c9b53c9d92..388c1bc14d 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056972503662109375  +DEBUG: model prefixing takes 0.00579524040222168  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,12 +210,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.153 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.671s -user 0m0.612s -sys 0m0.054s -Code generation completed in 0 seconds +real 0m0.690s +user 0m0.619s +sys 0m0.060s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h index 7b27c576ea..81f8722a61 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h @@ -937,7 +937,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP0 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP0; + ( *vertex ) = Ccoeff * COUP * -cI * TMP0; mgDebug( 1, __FUNCTION__ ); return; } @@ -965,7 +965,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -997,7 +997,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1028,7 +1028,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1068,7 +1068,7 @@ namespace mg5amcCpu const cxtype_sv TMP7 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP8 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP9 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP2 + cI * TMP3 ) + ( TMP5 * ( +cI * TMP4 - cI * TMP6 ) + TMP7 * ( -cI * TMP8 + cI * TMP9 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP2 + cI * TMP3 ) + ( TMP5 * ( +cI * TMP4 - cI * TMP6 ) + TMP7 * ( -cI * TMP8 + cI * TMP9 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 703d24d998..11ea151278 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -149,8 +149,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -172,14 +172,14 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.277 s +ALOHA: aloha creates 4 routines in 0.279 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.262 s +ALOHA: aloha creates 8 routines in 0.263 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.061s -user 0m1.786s -sys 0m0.232s +real 0m2.029s +user 0m1.814s +sys 0m0.217s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat index ac654fd4da..1e16203e31 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat @@ -172,6 +172,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat index 31511cf606..ae4845b2fa 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat @@ -172,6 +172,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h index d27655ea8f..210b67369c 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h @@ -958,7 +958,7 @@ namespace mg5amcCpu const cxtype_sv TMP1 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP2 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); const cxtype_sv TMP3 = ( P1[0] * P2[0] - P1[1] * P2[1] - P1[2] * P2[2] - P1[3] * P2[3] ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); S3[2] = denom * ( +cI * ( TMP0 * TMP1 ) - cI * ( TMP2 * TMP3 ) ); mgDebug( 1, __FUNCTION__ ); return; @@ -993,7 +993,7 @@ namespace mg5amcCpu const cxtype_sv TMP5 = ( P2[0] * V3[2] - P2[1] * V3[3] - P2[2] * V3[4] - P2[3] * V3[5] ); const cxtype_sv TMP6 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP7 = ( V2[2] * V3[2] - V2[3] * V3[3] - V2[4] * V3[4] - V2[5] * V3[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP7 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP4 + cI * TMP5 ) + V3[2] * ( +cI * TMP0 - cI * TMP6 ) ) ); V1[3] = denom * ( TMP7 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP4 + cI * TMP5 ) + V3[3] * ( +cI * TMP0 - cI * TMP6 ) ) ); V1[4] = denom * ( TMP7 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP4 + cI * TMP5 ) + V3[4] * ( +cI * TMP0 - cI * TMP6 ) ) ); @@ -1022,7 +1022,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP8 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP8; + ( *vertex ) = Ccoeff * COUP * -cI * TMP8; mgDebug( 1, __FUNCTION__ ); return; } @@ -1050,7 +1050,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1082,7 +1082,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1111,7 +1111,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * F2[2] + F1[3] * F2[3] + F1[4] * F2[4] + F1[5] * F2[5] ); - ( *vertex ) = COUP * -cI * TMP9 * S3[2]; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9 * S3[2]; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 599dc14f9e..6c41687365 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -66,7 +66,7 @@ INFO: load particles INFO: load vertices WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.0062215328216552734  +DEBUG: model prefixing takes 0.006304025650024414  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -162,7 +162,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.274 s +ALOHA: aloha creates 4 routines in 0.280 s VVS3 VVV1 FFV1 @@ -179,7 +179,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.684s -user 0m0.625s +real 0m0.696s +user 0m0.637s sys 0m0.055s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h index d27655ea8f..210b67369c 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h @@ -958,7 +958,7 @@ namespace mg5amcCpu const cxtype_sv TMP1 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP2 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); const cxtype_sv TMP3 = ( P1[0] * P2[0] - P1[1] * P2[1] - P1[2] * P2[2] - P1[3] * P2[3] ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); S3[2] = denom * ( +cI * ( TMP0 * TMP1 ) - cI * ( TMP2 * TMP3 ) ); mgDebug( 1, __FUNCTION__ ); return; @@ -993,7 +993,7 @@ namespace mg5amcCpu const cxtype_sv TMP5 = ( P2[0] * V3[2] - P2[1] * V3[3] - P2[2] * V3[4] - P2[3] * V3[5] ); const cxtype_sv TMP6 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP7 = ( V2[2] * V3[2] - V2[3] * V3[3] - V2[4] * V3[4] - V2[5] * V3[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP7 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP4 + cI * TMP5 ) + V3[2] * ( +cI * TMP0 - cI * TMP6 ) ) ); V1[3] = denom * ( TMP7 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP4 + cI * TMP5 ) + V3[3] * ( +cI * TMP0 - cI * TMP6 ) ) ); V1[4] = denom * ( TMP7 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP4 + cI * TMP5 ) + V3[4] * ( +cI * TMP0 - cI * TMP6 ) ) ); @@ -1022,7 +1022,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP8 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP8; + ( *vertex ) = Ccoeff * COUP * -cI * TMP8; mgDebug( 1, __FUNCTION__ ); return; } @@ -1050,7 +1050,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1082,7 +1082,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1111,7 +1111,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * F2[2] + F1[3] * F2[3] + F1[4] * F2[4] + F1[5] * F2[5] ); - ( *vertex ) = COUP * -cI * TMP9 * S3[2]; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9 * S3[2]; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index cc5cbed2bb..70b4bc3317 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005604982376098633  +DEBUG: model prefixing takes 0.005811452865600586  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.142 s +13 processes with 76 diagrams generated in 0.145 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.906 s +65 processes with 1119 diagrams generated in 1.949 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -499,8 +499,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -516,8 +516,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -533,8 +533,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -550,8 +550,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -567,8 +567,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -584,8 +584,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -601,8 +601,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -618,8 +618,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -635,8 +635,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -652,8 +652,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -669,8 +669,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -686,8 +686,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -703,8 +703,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -720,8 +720,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -737,8 +737,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -754,8 +754,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -771,8 +771,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -788,8 +788,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.352 s -Wrote files for 810 helas calls in 3.380 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.372 s +Wrote files for 810 helas calls in 3.475 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.352 s +ALOHA: aloha creates 5 routines in 0.359 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.325 s +ALOHA: aloha creates 10 routines in 0.332 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.708s -user 0m8.687s -sys 0m0.423s -Code generation completed in 10 seconds +real 0m9.378s +user 0m8.878s +sys 0m0.445s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat index 8fe4e23976..110d6b70ef 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat @@ -205,6 +205,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat index a870466b48..255326259e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat @@ -205,6 +205,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h index fac5316937..bcf4333c78 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h @@ -1042,7 +1042,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1076,7 +1076,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1133,7 +1133,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1165,7 +1165,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1196,7 +1196,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1230,7 +1230,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1261,7 +1261,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V3[2] * TMP11 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V3[3] * TMP11 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V3[4] * TMP11 ) ); @@ -1295,7 +1295,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1326,7 +1326,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( TMP6 * V4[2] ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( TMP6 * V4[3] ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( TMP6 * V4[4] ) + cI * ( V2[4] * TMP12 ) ); @@ -1360,7 +1360,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1391,7 +1391,7 @@ namespace mg5amcCpu const fptype_sv P1[4] = { -cxreal( V1[0] ), -cxreal( V1[1] ), -cximag( V1[1] ), -cximag( V1[0] ) }; const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( -cI * ( V3[2] * TMP11 ) + cI * ( V2[2] * TMP12 ) ); V1[3] = denom * ( -cI * ( V3[3] * TMP11 ) + cI * ( V2[3] * TMP12 ) ); V1[4] = denom * ( -cI * ( V3[4] * TMP11 ) + cI * ( V2[4] * TMP12 ) ); diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 70288a15e0..fc0dfe1efd 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14343023300170898  +DEBUG: model prefixing takes 0.14805054664611816  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.871 s +1 processes with 72 diagrams generated in 3.964 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -114,8 +114,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,15 +130,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s -Wrote files for 119 helas calls in 0.431 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s +Wrote files for 119 helas calls in 0.442 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.328 s +ALOHA: aloha creates 5 routines in 0.338 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.342 s +ALOHA: aloha creates 10 routines in 0.350 s VVV5 VVV5 FFV1 @@ -191,8 +191,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.071s -user 0m6.803s +real 0m7.276s +user 0m6.977s sys 0m0.249s Code generation completed in 7 seconds ************************************************************ diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat index eec3cd72ba..57e8e92627 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat index 3227661d2b..a8be1dfb18 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat @@ -159,6 +159,5 @@ systematics = systematics_program ! none, systematics [python], SysCalc [deprece #*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - d = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) cpp = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py index 0eeb31536b..7b4e0b7a30 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py @@ -52,7 +52,6 @@ def compile(self, *args, **opts): """#*********************************************************************** # SIMD/GPU configuration for the CUDACPP plugin #************************************************************************ - %(floating_type)s = floating_type ! floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors) %(cudacpp_backend)s = cudacpp_backend ! CUDACPP backend: fortran, cuda, hip, cpp, cppnone, cppsse4, cppavx2, cpp512y, cpp512z, cppauto """ @@ -86,9 +85,11 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() - self.add_param('floating_type', 'd', include=False, hidden=False, + self.add_param('floating_type', 'm', include=False, hidden=True, fct_mod=(self.reset_makeopts,(),{}), - allowed=['m','d','f']) + allowed=['m','d','f'], + comment='floating point precision: f (single), d (double), m (mixed: double for amplitudes, single for colors)' + ) cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ] self.add_param('cudacpp_backend', 'cpp', include=False, hidden=False, allowed=cudacpp_supported_backends) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h index e567829f1d..b1af58c440 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h @@ -1000,7 +1000,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1034,7 +1034,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1063,7 +1063,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1091,7 +1091,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1123,7 +1123,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1154,7 +1154,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1188,7 +1188,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1218,7 +1218,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1248,7 +1248,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index ad5f437053..4689e5272b 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14435124397277832  +DEBUG: model prefixing takes 0.14902615547180176  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.831 s +1 processes with 72 diagrams generated in 3.962 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.337 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.274s -user 0m5.174s -sys 0m0.072s -Code generation completed in 6 seconds +real 0m5.440s +user 0m5.326s +sys 0m0.093s +Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h index e567829f1d..b1af58c440 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h @@ -1000,7 +1000,7 @@ namespace mg5amcCpu const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); const cxtype_sv TMP7 = ( V1[2] * P2[0] - V1[3] * P2[1] - V1[4] * P2[2] - V1[5] * P2[3] ); const cxtype_sv TMP8 = ( V1[2] * P3[0] - V1[3] * P3[1] - V1[4] * P3[2] - V1[5] * P3[3] ); - ( *vertex ) = COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); + ( *vertex ) = Ccoeff * COUP * ( TMP1 * ( -cI * TMP0 + cI * TMP2 ) + ( TMP3 * ( +cI * TMP4 - cI * TMP5 ) + TMP6 * ( -cI * TMP7 + cI * TMP8 ) ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1034,7 +1034,7 @@ namespace mg5amcCpu const cxtype_sv TMP4 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP5 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP6 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP2 ) + V3[2] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[3] = denom * ( TMP6 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP2 ) + V3[3] * ( +cI * TMP4 - cI * TMP5 ) ) ); V1[4] = denom * ( TMP6 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP2 ) + V3[4] * ( +cI * TMP4 - cI * TMP5 ) ) ); @@ -1063,7 +1063,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP9 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP9; + ( *vertex ) = Ccoeff * COUP * -cI * TMP9; mgDebug( 1, __FUNCTION__ ); return; } @@ -1091,7 +1091,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1123,7 +1123,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); @@ -1154,7 +1154,7 @@ namespace mg5amcCpu V3[0] = +F1[0] + F2[0]; V3[1] = +F1[1] + F2[1]; const fptype_sv P3[4] = { -cxreal( V3[0] ), -cxreal( V3[1] ), -cximag( V3[1] ), -cximag( V3[0] ) }; - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); V3[2] = denom * ( -cI ) * ( F1[2] * F2[4] + F1[3] * F2[5] + F1[4] * F2[2] + F1[5] * F2[3] ); V3[3] = denom * ( -cI ) * ( -F1[2] * F2[5] - F1[3] * F2[4] + F1[4] * F2[3] + F1[5] * F2[2] ); V3[4] = denom * ( -cI ) * ( -cI * ( F1[2] * F2[5] + F1[5] * F2[2] ) + cI * ( F1[3] * F2[4] + F1[4] * F2[3] ) ); @@ -1188,7 +1188,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP3 * TMP11 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1218,7 +1218,7 @@ namespace mg5amcCpu const cxtype_sv TMP10 = ( V1[2] * V4[2] - V1[3] * V4[3] - V1[4] * V4[4] - V1[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP6 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP6 * TMP10 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1248,7 +1248,7 @@ namespace mg5amcCpu const cxtype_sv TMP11 = ( V2[2] * V4[2] - V2[3] * V4[3] - V2[4] * V4[4] - V2[5] * V4[5] ); const cxtype_sv TMP12 = ( V3[2] * V4[2] - V3[3] * V4[3] - V3[4] * V4[4] - V3[5] * V4[5] ); const cxtype_sv TMP3 = ( V3[2] * V1[2] - V3[3] * V1[3] - V3[4] * V1[4] - V3[5] * V1[5] ); - ( *vertex ) = COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); + ( *vertex ) = Ccoeff * COUP * ( -cI * ( TMP3 * TMP11 ) + cI * ( TMP1 * TMP12 ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 0637d12545..87d859ed6f 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.131 s +1 processes with 6 diagrams generated in 0.132 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,19 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.117 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Wrote files for 16 helas calls in 0.118 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.194 s +ALOHA: aloha creates 3 routines in 0.196 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.191 s +ALOHA: aloha creates 6 routines in 0.193 s VVV1 VSS1 VSS1 @@ -645,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.116s -user 0m2.655s -sys 0m0.262s +real 0m2.942s +user 0m2.689s +sys 0m0.251s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 3531f3a8c6..1f15b7bbe6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.130 s +1 processes with 6 diagrams generated in 0.131 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.194 s +ALOHA: aloha creates 3 routines in 0.196 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.427s -user 0m1.344s -sys 0m0.074s +real 0m1.477s +user 0m1.345s +sys 0m0.064s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h index bafab1ac2e..dc96852e85 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h @@ -947,7 +947,7 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP3 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP4 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP4 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP1 ) + V3[2] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[3] = denom * ( TMP4 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP1 ) + V3[3] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[4] = denom * ( TMP4 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP1 ) + V3[4] * ( +cI * TMP2 - cI * TMP3 ) ) ); @@ -979,7 +979,7 @@ namespace mg5amcCpu const fptype_sv P3[4] = { +cxreal( S3[0] ), +cxreal( S3[1] ), +cximag( S3[1] ), +cximag( S3[0] ) }; const cxtype_sv TMP5 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP6 = ( P3[0] * V1[2] - P3[1] * V1[3] - P3[2] * V1[4] - P3[3] * V1[5] ); - ( *vertex ) = COUP * S2[2] * S3[2] * ( -cI * TMP5 + cI * TMP6 ); + ( *vertex ) = Ccoeff * COUP * S2[2] * S3[2] * ( -cI * TMP5 + cI * TMP6 ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1009,7 +1009,7 @@ namespace mg5amcCpu const fptype_sv P2[4] = { -cxreal( S2[0] ), -cxreal( S2[1] ), -cximag( S2[1] ), -cximag( S2[0] ) }; const cxtype_sv TMP5 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP6 = ( P3[0] * V1[2] - P3[1] * V1[3] - P3[2] * V1[4] - P3[3] * V1[5] ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); S2[2] = denom * S3[2] * ( +cI * TMP5 - cI * TMP6 ); mgDebug( 1, __FUNCTION__ ); return; @@ -1040,7 +1040,7 @@ namespace mg5amcCpu const fptype_sv P3[4] = { -cxreal( S3[0] ), -cxreal( S3[1] ), -cximag( S3[1] ), -cximag( S3[0] ) }; const cxtype_sv TMP5 = ( P2[0] * V1[2] - P2[1] * V1[3] - P2[2] * V1[4] - P2[3] * V1[5] ); const cxtype_sv TMP6 = ( P3[0] * V1[2] - P3[1] * V1[3] - P3[2] * V1[4] - P3[3] * V1[5] ); - const cxtype_sv denom = COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); S3[2] = denom * S2[2] * ( +cI * TMP5 - cI * TMP6 ); mgDebug( 1, __FUNCTION__ ); return; @@ -1068,7 +1068,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP7 = ( V2[2] * V1[2] - V2[3] * V1[3] - V2[4] * V1[4] - V2[5] * V1[5] ); - ( *vertex ) = COUP * -cI * TMP7 * S4[2] * S3[2]; + ( *vertex ) = Ccoeff * COUP * -cI * TMP7 * S4[2] * S3[2]; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index bc5212197d..7ed6df6415 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -59,9 +59,6 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.9616334438323975  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -557,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.114 s +1 processes with 3 diagrams generated in 0.125 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -580,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -595,17 +592,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.146 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.142 s +ALOHA: aloha creates 4 routines in 0.143 s VVV1 FFV1 FFV1 @@ -641,10 +638,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.152s -user 0m3.711s -sys 0m0.234s -Code generation completed in 5 seconds +real 0m2.842s +user 0m2.551s +sys 0m0.249s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 09a04f791a..564ddce6e1 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -59,9 +59,6 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.9576148986816406  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -557,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.117 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -585,7 +582,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.146 s VVV1 FFV1 FFV1 @@ -600,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.489s -user 0m2.409s -sys 0m0.070s -Code generation completed in 3 seconds +real 0m1.359s +user 0m1.300s +sys 0m0.053s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h index 45fecfbc22..2519e3902b 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h @@ -934,7 +934,7 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( P1[0] * V2[2] - P1[1] * V2[3] - P1[2] * V2[4] - P1[3] * V2[5] ); const cxtype_sv TMP3 = ( V2[2] * P3[0] - V2[3] * P3[1] - V2[4] * P3[2] - V2[5] * P3[3] ); const cxtype_sv TMP4 = ( V3[2] * V2[2] - V3[3] * V2[3] - V3[4] * V2[4] - V3[5] * V2[5] ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); V1[2] = denom * ( TMP4 * ( -cI * P2[0] + cI * P3[0] ) + ( V2[2] * ( -cI * TMP0 + cI * TMP1 ) + V3[2] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[3] = denom * ( TMP4 * ( -cI * P2[1] + cI * P3[1] ) + ( V2[3] * ( -cI * TMP0 + cI * TMP1 ) + V3[3] * ( +cI * TMP2 - cI * TMP3 ) ) ); V1[4] = denom * ( TMP4 * ( -cI * P2[2] + cI * P3[2] ) + ( V2[4] * ( -cI * TMP0 + cI * TMP1 ) + V3[4] * ( +cI * TMP2 - cI * TMP3 ) ) ); @@ -963,7 +963,7 @@ namespace mg5amcCpu cxtype_sv* vertex = A_ACCESS::kernelAccess( allvertexes ); const cxtype cI = cxmake( 0., 1. ); const cxtype_sv TMP5 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + ( F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) + ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ) ) ); - ( *vertex ) = COUP * -cI * TMP5; + ( *vertex ) = Ccoeff * COUP * -cI * TMP5; mgDebug( 1, __FUNCTION__ ); return; } @@ -991,7 +991,7 @@ namespace mg5amcCpu F1[1] = +F2[1] + V3[1]; const fptype_sv P1[4] = { -cxreal( F1[0] ), -cxreal( F1[1] ), -cximag( F1[1] ), -cximag( F1[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P1[0] * P1[0] ) - ( P1[1] * P1[1] ) - ( P1[2] * P1[2] ) - ( P1[3] * P1[3] ) - M1 * ( M1 - cI * W1 ) ); F1[2] = denom * cI * ( F2[2] * ( P1[0] * ( -V3[2] + V3[5] ) + ( P1[1] * ( V3[3] - cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] + V3[4] ) + P1[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P1[2] * ( -one ) * ( +cI * ( V3[2] + V3[5] ) ) + P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) ) ); F1[3] = denom * ( -cI ) * ( F2[2] * ( P1[0] * ( -V3[3] + cI * V3[4] ) + ( P1[1] * ( V3[2] - V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) + P1[3] * ( V3[3] - cI * V3[4] ) ) ) ) + ( F2[3] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P1[2] * ( +cI * V3[3] - V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + M1 * ( F2[4] * ( -V3[3] + cI * V3[4] ) + F2[5] * ( -V3[2] + V3[5] ) ) ) ); F1[4] = denom * ( -cI ) * ( F2[4] * ( P1[0] * ( V3[2] + V3[5] ) + ( P1[1] * ( -V3[3] + cI * V3[4] ) + ( P1[2] * ( -one ) * ( +cI * V3[3] + V3[4] ) - P1[3] * ( V3[2] + V3[5] ) ) ) ) + ( F2[5] * ( P1[0] * ( V3[3] + cI * V3[4] ) + ( P1[1] * ( -V3[2] + V3[5] ) + ( P1[2] * ( -cI * V3[2] + cI * V3[5] ) - P1[3] * ( V3[3] + cI * V3[4] ) ) ) ) + M1 * ( F2[2] * ( -V3[2] + V3[5] ) + F2[3] * ( V3[3] + cI * V3[4] ) ) ) ); @@ -1023,7 +1023,7 @@ namespace mg5amcCpu F2[1] = +F1[1] + V3[1]; const fptype_sv P2[4] = { -cxreal( F2[0] ), -cxreal( F2[1] ), -cximag( F2[1] ), -cximag( F2[0] ) }; constexpr fptype one( 1. ); - const cxtype_sv denom = COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); + const cxtype_sv denom = Ccoeff * COUP / ( ( P2[0] * P2[0] ) - ( P2[1] * P2[1] ) - ( P2[2] * P2[2] ) - ( P2[3] * P2[3] ) - M2 * ( M2 - cI * W2 ) ); F2[2] = denom * cI * ( F1[2] * ( P2[0] * ( V3[2] + V3[5] ) + ( P2[1] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] - V3[4] ) - P2[3] * ( V3[2] + V3[5] ) ) ) ) + ( F1[3] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -V3[2] + V3[5] ) + ( P2[2] * ( +cI * V3[2] - cI * V3[5] ) + P2[3] * ( -V3[3] + cI * V3[4] ) ) ) ) + M2 * ( F1[4] * ( V3[2] - V3[5] ) + F1[5] * ( -V3[3] + cI * V3[4] ) ) ) ); F2[3] = denom * ( -cI ) * ( F1[2] * ( P2[0] * ( -one ) * ( V3[3] + cI * V3[4] ) + ( P2[1] * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) - P2[3] * ( V3[3] + cI * V3[4] ) ) ) ) + ( F1[3] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] - cI * V3[4] ) + ( P2[2] * ( +cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + M2 * ( F1[4] * ( V3[3] + cI * V3[4] ) - F1[5] * ( V3[2] + V3[5] ) ) ) ); F2[4] = denom * ( -cI ) * ( F1[4] * ( P2[0] * ( -V3[2] + V3[5] ) + ( P2[1] * ( V3[3] + cI * V3[4] ) + ( P2[2] * ( -cI * V3[3] + V3[4] ) + P2[3] * ( -V3[2] + V3[5] ) ) ) ) + ( F1[5] * ( P2[0] * ( V3[3] - cI * V3[4] ) + ( P2[1] * ( -one ) * ( V3[2] + V3[5] ) + ( P2[2] * ( +cI * ( V3[2] + V3[5] ) ) + P2[3] * ( V3[3] - cI * V3[4] ) ) ) ) + M2 * ( F1[2] * ( -one ) * ( V3[2] + V3[5] ) + F1[3] * ( -V3[3] + cI * V3[4] ) ) ) ); From 11d8b38d18f8ec7c3cdd471814cb5be009193ce9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 14:54:54 +0200 Subject: [PATCH 04/40] [susy] go back to upstream/master CODEGEN logs to allow the merge with launch PR #851 git checkout upstream/master $(git ls-tree --name-only HEAD */CODE*txt) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 20 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 20 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 14 +-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 ++--- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 22 ++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 ++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 ++--- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 ++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 16 ++-- .../CODEGEN_mad_heft_gg_bb_log.txt | 14 +-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 8 +- .../CODEGEN_mad_pp_tt012j_log.txt | 94 +++++++++---------- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 ++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 16 ++-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 ++-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 10 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 16 ++-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 15 +-- 22 files changed, 221 insertions(+), 218 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index cdd2c3d44d..f08289bad0 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0059506893157958984  +DEBUG: model prefixing takes 0.0057582855224609375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -176,8 +176,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.105 s +Wrote files for 8 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.213 s +ALOHA: aloha creates 3 routines in 0.209 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.271 s +ALOHA: aloha creates 7 routines in 0.272 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.071s -user 0m1.736s -sys 0m0.221s +real 0m1.930s +user 0m1.716s +sys 0m0.204s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 065a63b45c..f845f639cc 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005798816680908203  +DEBUG: model prefixing takes 0.00582575798034668  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.290 s +ALOHA: aloha creates 4 routines in 0.287 s FFV1 FFV1 FFV2 @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.703s -user 0m0.648s -sys 0m0.045s +real 0m0.836s +user 0m0.670s +sys 0m0.059s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 67447d42d3..bcd13f1f43 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005841732025146484  +DEBUG: model prefixing takes 0.005464315414428711  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.108 s +Wrote files for 10 helas calls in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.155 s +ALOHA: aloha creates 2 routines in 0.153 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.142 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.079s -user 0m1.557s -sys 0m0.240s +real 0m1.771s +user 0m1.550s +sys 0m0.221s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index ccc526907f..74599408a5 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005822658538818359  +DEBUG: model prefixing takes 0.005803108215332031  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -183,7 +183,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.149 s VVV1 FFV1 FFV1 @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.642s -user 0m0.515s -sys 0m0.048s -Code generation completed in 1 seconds +real 0m0.703s +user 0m0.492s +sys 0m0.049s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 68c32ffdec..e0bc256894 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058231353759765625  +DEBUG: model prefixing takes 0.005784273147583008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -187,8 +187,8 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -204,8 +204,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -221,14 +221,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s -Wrote files for 46 helas calls in 0.257 s +Wrote files for 46 helas calls in 0.279 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.348 s +ALOHA: aloha creates 5 routines in 0.338 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.330 s +ALOHA: aloha creates 10 routines in 0.323 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.440s -user 0m2.187s -sys 0m0.229s +real 0m2.389s +user 0m2.141s +sys 0m0.224s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 469d85f9a8..d226034616 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00583338737487793  +DEBUG: model prefixing takes 0.005830287933349609  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.159 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.156 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.351 s +ALOHA: aloha creates 5 routines in 0.344 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.338 s +ALOHA: aloha creates 10 routines in 0.328 s VVV1 VVV1 FFV1 @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.380s -user 0m2.064s -sys 0m0.237s +real 0m2.256s +user 0m2.049s +sys 0m0.205s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 982f4861c0..e94069458a 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005800724029541016  +DEBUG: model prefixing takes 0.005761384963989258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.336 s VVV1 VVV1 FFV1 @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.821s -user 0m0.755s -sys 0m0.061s +real 0m0.797s +user 0m0.737s +sys 0m0.056s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 33527b2add..2ce03bcd21 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005804300308227539  +DEBUG: model prefixing takes 0.005746126174926758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.168 s +1 processes with 123 diagrams generated in 0.164 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.454 s -Wrote files for 222 helas calls in 0.737 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.452 s +Wrote files for 222 helas calls in 0.720 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.355 s +ALOHA: aloha creates 5 routines in 0.343 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.339 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.683s -user 0m3.189s -sys 0m0.260s +real 0m3.664s +user 0m3.149s +sys 0m0.231s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index f5a1a02359..88a6d07d2d 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058460235595703125  +DEBUG: model prefixing takes 0.005809783935546875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.168 s +1 processes with 123 diagrams generated in 0.165 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.453 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.333 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.535s -user 0m1.452s -sys 0m0.066s -Code generation completed in 1 seconds +real 0m1.630s +user 0m1.428s +sys 0m0.058s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 9c83cf7313..2588d0cd2e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00582122802734375  +DEBUG: model prefixing takes 0.005673885345458984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.002 s +1 processes with 1240 diagrams generated in 1.945 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -179,8 +179,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.995 s -Wrote files for 2281 helas calls in 19.672 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.855 s +Wrote files for 2281 helas calls in 19.401 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.330 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.332 s +ALOHA: aloha creates 10 routines in 0.322 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m31.033s -user 0m30.516s -sys 0m0.412s +real 0m30.499s +user 0m29.983s +sys 0m0.419s Code generation completed in 31 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 7a363f97bd..6b5074a2c1 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005854606628417969  +DEBUG: model prefixing takes 0.005708456039428711  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.003 s +1 processes with 1240 diagrams generated in 1.997 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 7.018 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.860 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.379 s +ALOHA: aloha creates 5 routines in 0.365 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.875s -user 0m13.703s -sys 0m0.106s -Code generation completed in 13 seconds +real 0m13.606s +user 0m13.435s +sys 0m0.117s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 7871ea657d..60a7e04bfe 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005815744400024414  +DEBUG: model prefixing takes 0.005821704864501953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -200,8 +200,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,8 +217,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -234,7 +234,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.233 s +Wrote files for 32 helas calls in 0.236 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines @@ -243,7 +243,7 @@ ALOHA: aloha creates 2 routines in 0.156 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.141 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.029s -user 0m1.803s -sys 0m0.226s +real 0m2.028s +user 0m1.809s +sys 0m0.218s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 388c1bc14d..c9b53c9d92 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00579524040222168  +DEBUG: model prefixing takes 0.0056972503662109375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,12 +210,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.151 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.690s -user 0m0.619s -sys 0m0.060s -Code generation completed in 1 seconds +real 0m0.671s +user 0m0.612s +sys 0m0.054s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 11ea151278..703d24d998 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -149,8 +149,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -172,14 +172,14 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.279 s +ALOHA: aloha creates 4 routines in 0.277 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.263 s +ALOHA: aloha creates 8 routines in 0.262 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.029s -user 0m1.814s -sys 0m0.217s +real 0m2.061s +user 0m1.786s +sys 0m0.232s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 6c41687365..599dc14f9e 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -66,7 +66,7 @@ INFO: load particles INFO: load vertices WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.006304025650024414  +DEBUG: model prefixing takes 0.0062215328216552734  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -162,7 +162,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.280 s +ALOHA: aloha creates 4 routines in 0.274 s VVS3 VVV1 FFV1 @@ -179,7 +179,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.696s -user 0m0.637s +real 0m0.684s +user 0m0.625s sys 0m0.055s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 70b4bc3317..cc5cbed2bb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005811452865600586  +DEBUG: model prefixing takes 0.005604982376098633  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.145 s +13 processes with 76 diagrams generated in 0.142 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.949 s +65 processes with 1119 diagrams generated in 1.906 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -499,8 +499,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -516,8 +516,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -533,8 +533,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -550,8 +550,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -567,8 +567,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -584,8 +584,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -601,8 +601,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -618,8 +618,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -635,8 +635,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -652,8 +652,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -669,8 +669,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -686,8 +686,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -703,8 +703,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -720,8 +720,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -737,8 +737,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -754,8 +754,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -771,8 +771,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -788,8 +788,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.372 s -Wrote files for 810 helas calls in 3.475 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.352 s +Wrote files for 810 helas calls in 3.380 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.359 s +ALOHA: aloha creates 5 routines in 0.352 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.332 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.378s -user 0m8.878s -sys 0m0.445s -Code generation completed in 9 seconds +real 0m9.708s +user 0m8.687s +sys 0m0.423s +Code generation completed in 10 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index fc0dfe1efd..70288a15e0 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14805054664611816  +DEBUG: model prefixing takes 0.14343023300170898  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.964 s +1 processes with 72 diagrams generated in 3.871 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -114,8 +114,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -130,15 +130,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx -Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s -Wrote files for 119 helas calls in 0.442 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s +Wrote files for 119 helas calls in 0.431 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.328 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.350 s +ALOHA: aloha creates 10 routines in 0.342 s VVV5 VVV5 FFV1 @@ -191,8 +191,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.276s -user 0m6.977s +real 0m7.071s +user 0m6.803s sys 0m0.249s Code generation completed in 7 seconds ************************************************************ diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 4689e5272b..ad5f437053 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14902615547180176  +DEBUG: model prefixing takes 0.14435124397277832  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.962 s +1 processes with 72 diagrams generated in 3.831 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.327 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.440s -user 0m5.326s -sys 0m0.093s -Code generation completed in 5 seconds +real 0m5.274s +user 0m5.174s +sys 0m0.072s +Code generation completed in 6 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 87d859ed6f..2b28aa829b 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.132 s +1 processes with 6 diagrams generated in 0.131 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,18 +593,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.118 s +Wrote files for 16 helas calls in 0.117 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.195 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.193 s +ALOHA: aloha creates 6 routines in 0.192 s VVV1 VSS1 VSS1 @@ -645,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.942s -user 0m2.689s -sys 0m0.251s +real 0m2.918s +user 0m2.669s +sys 0m0.245s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 1f15b7bbe6..3531f3a8c6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.131 s +1 processes with 6 diagrams generated in 0.130 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.194 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.477s -user 0m1.345s -sys 0m0.064s +real 0m1.427s +user 0m1.344s +sys 0m0.074s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 7ed6df6415..bfb96cff2a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.125 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,12 +597,12 @@ Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.146 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.143 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.842s -user 0m2.551s -sys 0m0.249s +real 0m2.857s +user 0m2.538s +sys 0m0.270s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 564ddce6e1..09a04f791a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -59,6 +59,9 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 +INFO: load particles +INFO: load vertices +DEBUG: model prefixing takes 0.9576148986816406  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -554,7 +557,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.126 s +1 processes with 3 diagrams generated in 0.117 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +585,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.146 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -597,7 +600,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.359s -user 0m1.300s -sys 0m0.053s -Code generation completed in 2 seconds +real 0m2.489s +user 0m2.409s +sys 0m0.070s +Code generation completed in 3 seconds From 37f86317fba7a283c281a1b8440568e25ff6a585 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 15:00:57 +0200 Subject: [PATCH 05/40] [susy] regenerate all processes with both PR #835 and PR #851 patches - only codelogs change anyway --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 18 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 20 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 ++--- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 24 ++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 24 ++--- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 24 ++--- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +-- .../CODEGEN_mad_heft_gg_bb_log.txt | 16 ++-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 10 +- .../CODEGEN_mad_pp_tt012j_log.txt | 94 +++++++++---------- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 ++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 +-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 18 ++-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 8 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 16 ++-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 10 +- 22 files changed, 214 insertions(+), 214 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 01558e97fa..91519bd199 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005818367004394531  +DEBUG: model prefixing takes 0.0058062076568603516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,8 +176,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.104 s +Wrote files for 8 helas calls in 0.105 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.213 s +ALOHA: aloha creates 3 routines in 0.212 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.280 s +ALOHA: aloha creates 7 routines in 0.270 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.985s -user 0m1.747s -sys 0m0.212s +real 0m2.629s +user 0m1.753s +sys 0m0.211s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index b0fc131d4f..cfbf379d71 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005814552307128906  +DEBUG: model prefixing takes 0.005816221237182617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.689s -user 0m0.639s -sys 0m0.045s -Code generation completed in 1 seconds +real 0m0.775s +user 0m0.647s +sys 0m0.057s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 4da158143b..af7830f01e 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058248043060302734  +DEBUG: model prefixing takes 0.005688667297363281  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.108 s +Wrote files for 10 helas calls in 0.109 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.156 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.142 s VVV1 FFV1 FFV1 @@ -239,10 +239,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.797s -user 0m1.551s -sys 0m0.228s -Code generation completed in 1 seconds +real 0m1.902s +user 0m1.560s +sys 0m0.227s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index a4dc00f87b..176649bab7 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005820274353027344  +DEBUG: model prefixing takes 0.005812644958496094  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.635s -user 0m0.505s -sys 0m0.048s -Code generation completed in 0 seconds +real 0m0.561s +user 0m0.496s +sys 0m0.060s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b3f5ed282a..8c7deb3d21 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005791902542114258  +DEBUG: model prefixing takes 0.0058057308197021484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -187,8 +187,8 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -204,8 +204,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -221,14 +221,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s -Wrote files for 46 helas calls in 0.259 s +Wrote files for 46 helas calls in 0.256 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.348 s +ALOHA: aloha creates 5 routines in 0.347 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.330 s +ALOHA: aloha creates 10 routines in 0.340 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.666s -user 0m2.189s -sys 0m0.225s +real 0m2.426s +user 0m2.178s +sys 0m0.223s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index f2938af2d2..b8b101f03d 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005806684494018555  +DEBUG: model prefixing takes 0.005696535110473633  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,14 +194,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.157 s +Wrote files for 36 helas calls in 0.158 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.342 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.331 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -254,10 +254,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.562s -user 0m2.060s -sys 0m0.221s -Code generation completed in 3 seconds +real 0m2.354s +user 0m2.039s +sys 0m0.226s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 2650bec87e..c7633aaea7 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005831718444824219  +DEBUG: model prefixing takes 0.005728721618652344  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.852s -user 0m0.759s -sys 0m0.056s -Code generation completed in 1 seconds +real 0m0.824s +user 0m0.772s +sys 0m0.042s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index d479b476a0..9d19df8188 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00582575798034668  +DEBUG: model prefixing takes 0.005671262741088867  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.169 s +1 processes with 123 diagrams generated in 0.166 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.460 s -Wrote files for 222 helas calls in 0.741 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.447 s +Wrote files for 222 helas calls in 0.745 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.355 s +ALOHA: aloha creates 5 routines in 0.353 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.338 s +ALOHA: aloha creates 10 routines in 0.332 s VVV1 VVV1 FFV1 @@ -257,10 +257,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.505s -user 0m3.236s -sys 0m0.224s -Code generation completed in 3 seconds +real 0m3.779s +user 0m3.182s +sys 0m0.250s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 89a9e25c18..f9ef67bae5 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057909488677978516  +DEBUG: model prefixing takes 0.0055997371673583984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.170 s +1 processes with 123 diagrams generated in 0.171 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.451 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.450 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.544s -user 0m1.463s -sys 0m0.054s -Code generation completed in 1 seconds +real 0m1.525s +user 0m1.447s +sys 0m0.066s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index da6acfa84a..bcecc00009 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005862236022949219  +DEBUG: model prefixing takes 0.005766391754150391  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.007 s +1 processes with 1240 diagrams generated in 1.986 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -179,8 +179,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 7.000 s -Wrote files for 2281 helas calls in 19.666 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.931 s +Wrote files for 2281 helas calls in 19.490 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.336 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.330 s +ALOHA: aloha creates 10 routines in 0.328 s VVV1 VVV1 FFV1 @@ -259,10 +259,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m31.035s -user 0m30.488s -sys 0m0.422s -Code generation completed in 32 seconds +real 0m30.786s +user 0m30.251s +sys 0m0.400s +Code generation completed in 30 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index b0d9872611..4e2d7c04bd 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005834102630615234  +DEBUG: model prefixing takes 0.0056269168853759766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 2.007 s +1 processes with 1240 diagrams generated in 1.991 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 7.027 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.900 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.374 s +ALOHA: aloha creates 5 routines in 0.376 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.929s -user 0m13.764s -sys 0m0.108s +real 0m13.707s +user 0m13.535s +sys 0m0.118s Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index c685148505..db4d1b2f5b 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005826234817504883  +DEBUG: model prefixing takes 0.005600929260253906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -200,8 +200,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,8 +217,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -234,16 +234,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.234 s +Wrote files for 32 helas calls in 0.231 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.153 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.139 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.041s -user 0m1.799s -sys 0m0.227s +real 0m2.230s +user 0m1.771s +sys 0m0.236s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 497eedfefd..8d830a08ba 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00579071044921875  +DEBUG: model prefixing takes 0.005650997161865234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.082 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -215,7 +215,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.151 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.690s -user 0m0.614s -sys 0m0.065s +real 0m0.680s +user 0m0.624s +sys 0m0.051s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 534d00c329..99374cf5f8 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -149,8 +149,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.111 s +Wrote files for 12 helas calls in 0.109 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.280 s +ALOHA: aloha creates 4 routines in 0.276 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.264 s +ALOHA: aloha creates 8 routines in 0.474 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.047s -user 0m1.806s -sys 0m0.230s +real 0m2.489s +user 0m1.764s +sys 0m0.253s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 8eeb803fc7..bb27075562 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -157,7 +157,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.277 s +ALOHA: aloha creates 4 routines in 0.283 s VVS3 VVV1 FFV1 @@ -174,7 +174,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.723s -user 0m0.625s -sys 0m0.046s -Code generation completed in 1 seconds +real 0m0.677s +user 0m0.600s +sys 0m0.062s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index b26d47fafc..beb9862c99 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005836009979248047  +DEBUG: model prefixing takes 0.0058329105377197266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.146 s +13 processes with 76 diagrams generated in 0.144 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.970 s +65 processes with 1119 diagrams generated in 1.936 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -499,8 +499,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -516,8 +516,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -533,8 +533,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -550,8 +550,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -567,8 +567,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -584,8 +584,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -601,8 +601,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -618,8 +618,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -635,8 +635,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -652,8 +652,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -669,8 +669,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -686,8 +686,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -703,8 +703,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -720,8 +720,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -737,8 +737,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -754,8 +754,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -771,8 +771,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -788,8 +788,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.375 s -Wrote files for 810 helas calls in 3.466 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.367 s +Wrote files for 810 helas calls in 3.438 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.357 s +ALOHA: aloha creates 5 routines in 0.356 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.333 s +ALOHA: aloha creates 10 routines in 0.331 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.404s -user 0m8.903s -sys 0m0.418s -Code generation completed in 10 seconds +real 0m9.387s +user 0m8.804s +sys 0m0.434s +Code generation completed in 9 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index e171469df6..422db5816a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14738821983337402  +DEBUG: model prefixing takes 0.1452183723449707  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.935 s +1 processes with 72 diagrams generated in 3.917 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -114,8 +114,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -131,14 +131,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s -Wrote files for 119 helas calls in 0.442 s +Wrote files for 119 helas calls in 0.436 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.339 s +ALOHA: aloha creates 5 routines in 0.333 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.355 s +ALOHA: aloha creates 10 routines in 0.350 s VVV5 VVV5 FFV1 @@ -191,9 +191,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.305s -user 0m6.946s -sys 0m0.253s +real 0m7.180s +user 0m6.909s +sys 0m0.243s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index afe7467840..863794fe85 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14697813987731934  +DEBUG: model prefixing takes 0.14494752883911133  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.942 s +1 processes with 72 diagrams generated in 3.880 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.332 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.448s -user 0m5.330s -sys 0m0.058s +real 0m5.347s +user 0m5.256s +sys 0m0.063s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 78d37d6c49..42e364fe16 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.130 s +1 processes with 6 diagrams generated in 0.132 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -593,18 +593,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.117 s +Wrote files for 16 helas calls in 0.116 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.198 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.201 s +ALOHA: aloha creates 6 routines in 0.192 s VVV1 VSS1 VSS1 @@ -645,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.081s -user 0m2.681s -sys 0m0.245s +real 0m3.408s +user 0m2.655s +sys 0m0.256s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 4953f08208..18293f5350 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.131 s +1 processes with 6 diagrams generated in 0.133 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.193 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.421s -user 0m1.348s +real 0m1.554s +user 0m1.342s sys 0m0.062s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 2fb4d8a715..c6fc0cbca8 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.126 s +1 processes with 3 diagrams generated in 0.124 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,12 +597,12 @@ Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.140 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.805s -user 0m2.548s -sys 0m0.244s +real 0m2.972s +user 0m2.522s +sys 0m0.256s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index a6c54f90b2..57ce689dad 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.125 s +1 processes with 3 diagrams generated in 0.124 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +582,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.144 s VVV1 FFV1 FFV1 @@ -597,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.365s -user 0m1.280s -sys 0m0.067s +real 0m1.489s +user 0m1.276s +sys 0m0.066s Code generation completed in 1 seconds From b01019813a31006bf4e73cbcd3addc96f00eeb14 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 16:17:11 +0200 Subject: [PATCH 06/40] [susy] in susy_gg_t1t1.mad make_opts, try to add -fno-tree-vectorize to global flags to fix a SIGFPE in Fortran rotxx (see #826) --- epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts index e4b87ee6ad..8b9f53569b 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts @@ -1,7 +1,7 @@ DEFAULT_CPP_COMPILER=g++ DEFAULT_F2PY_COMPILER=f2py3 DEFAULT_F_COMPILER=gfortran -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -fno-tree-vectorize MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled From 6d3ebbc942557891520cf47e28d4b1e6fc74bd14 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 16:21:22 +0200 Subject: [PATCH 07/40] [susy] in susy_gg_t1t1.mad runcard, try to add -fno-tree-vectorize to global flags to fix a SIGFPE in Fortran rotxx (see #826) (NB: if you run bin/generate_events, the runcard value supersedes that in make_opts!) --- epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat index 57e8e92627..ff4802ae25 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat @@ -92,7 +92,7 @@ #********************************************************************* # Compilation flag. #********************************************************************* - -O3 -ffast-math -fbounds-check = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) + -O3 -ffast-math -fbounds-check -fno-tree-vectorize = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) --fast-math = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' -O3 = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' 16384 = vector_size ! size of fortran arrays allocated in the multi-event API for SIMD/GPU (VECSIZE_MEMMAX) From 40ef9530894e5db37e6d8fbf398cda14879a4f33 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 16:45:16 +0200 Subject: [PATCH 08/40] [susy] in susy_gg_t1t1.mad runcard and make_opts, try to change -O3 to -O2 in global_flags to fix all SIGFPEs in Frtran (#826) --- epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat | 2 +- epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat index ff4802ae25..40452fb385 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat @@ -92,7 +92,7 @@ #********************************************************************* # Compilation flag. #********************************************************************* - -O3 -ffast-math -fbounds-check -fno-tree-vectorize = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) + -O2 -ffast-math -fbounds-check -fno-tree-vectorize = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) --fast-math = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' -O3 = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' 16384 = vector_size ! size of fortran arrays allocated in the multi-event API for SIMD/GPU (VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts index 8b9f53569b..91c15a792d 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts @@ -1,7 +1,7 @@ DEFAULT_CPP_COMPILER=g++ DEFAULT_F2PY_COMPILER=f2py3 DEFAULT_F_COMPILER=gfortran -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -fno-tree-vectorize +GLOBAL_FLAG=-O2 -ffast-math -fbounds-check -fno-tree-vectorize MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled From dd4df95d98c42cd9bbc9cc11fbad4396893b2464 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 16:49:12 +0200 Subject: [PATCH 09/40] [susy] in susy_gg_t1t1.mad runcard and make_opts, add -g in global_flags to debug SIGFPEs in Frtran (#826) --- epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat | 2 +- epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat index 40452fb385..010d2376bf 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat @@ -92,7 +92,7 @@ #********************************************************************* # Compilation flag. #********************************************************************* - -O2 -ffast-math -fbounds-check -fno-tree-vectorize = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) + -O2 -ffast-math -fbounds-check -fno-tree-vectorize -g = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) --fast-math = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' -O3 = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' 16384 = vector_size ! size of fortran arrays allocated in the multi-event API for SIMD/GPU (VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts index 91c15a792d..ed8e179af2 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts @@ -1,7 +1,7 @@ DEFAULT_CPP_COMPILER=g++ DEFAULT_F2PY_COMPILER=f2py3 DEFAULT_F_COMPILER=gfortran -GLOBAL_FLAG=-O2 -ffast-math -fbounds-check -fno-tree-vectorize +GLOBAL_FLAG=-O2 -ffast-math -fbounds-check -fno-tree-vectorize -g MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled From 7b160e8f342c9dc5eb9b3c0d0719c417dbc44436 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 30 May 2024 16:52:45 +0200 Subject: [PATCH 10/40] [susy] in susy_gg_t1t1.mad runcard and make_opts, try to change -O2 to -O1 in global_flags to fix all SIGFPEs in Frtran (#826) --- epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat | 2 +- epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat index 010d2376bf..90222d44c3 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat @@ -92,7 +92,7 @@ #********************************************************************* # Compilation flag. #********************************************************************* - -O2 -ffast-math -fbounds-check -fno-tree-vectorize -g = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) + -O1 -ffast-math -fbounds-check -fno-tree-vectorize -g = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) --fast-math = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' -O3 = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' 16384 = vector_size ! size of fortran arrays allocated in the multi-event API for SIMD/GPU (VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts index ed8e179af2..ad917c5fde 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts @@ -1,7 +1,7 @@ DEFAULT_CPP_COMPILER=g++ DEFAULT_F2PY_COMPILER=f2py3 DEFAULT_F_COMPILER=gfortran -GLOBAL_FLAG=-O2 -ffast-math -fbounds-check -fno-tree-vectorize -g +GLOBAL_FLAG=-O1 -ffast-math -fbounds-check -fno-tree-vectorize -g MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled From bed2e12ae4aae204b09b42b0c090d41165d78c5f Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Fri, 31 May 2024 16:47:59 +0200 Subject: [PATCH 11/40] try to fix the segfault on issue 826 --- MG5aMC/mg5amcnlo | 2 +- .../iolibs/template_files/gpu/coloramps.h | 7 +++- .../gpu/process_sigmaKin_function.inc | 4 +-- .../CUDACPP_SA_OUTPUT/model_handling.py | 34 +++++++++++++++++-- 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index f9f9579188..10378b3c09 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit f9f95791884846ce82b5bf7997726222d8ffbe5e +Subproject commit 10378b3c0971e1a241fd9dc365e592c92d1f13ba diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h index 3972cf8e9a..1a11eb0e11 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h @@ -6,10 +6,15 @@ #ifndef COLORAMPS_H #define COLORAMPS_H 1 +#include + namespace mgOnGpu { - __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { + __device__ std::map diag_to_channel = { + %(diag_to_channel)s + }; + __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { %(is_LC)s }; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index 1ac5ecb303..1b12e4007b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -72,7 +72,7 @@ // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = channelId - 1; // coloramps.h uses the C array indexing starting at 0 + const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id fptype targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { @@ -187,7 +187,7 @@ // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = channelId - 1; // coloramps.h uses the C array indexing starting at 0 + const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id fptype_sv targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 53213121d9..3e685227dc 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1433,8 +1433,7 @@ def generate_process_files(self): self.edit_check_sa() self.edit_mgonGPU() self.edit_processidfile() # AV new file (NB this is Sigma-specific, should not be a symlink to Subprocesses) - if self.include_multi_channel: - self.edit_coloramps() # AV new file (NB this is Sigma-specific, should not be a symlink to Subprocesses) + self.edit_testxxx() # AV new file (NB this is generic in Subprocesses and then linked in Sigma-specific) self.edit_memorybuffers() # AV new file (NB this is generic in Subprocesses and then linked in Sigma-specific) self.edit_memoryaccesscouplings() # AV new file (NB this is generic in Subprocesses and then linked in Sigma-specific) @@ -1512,19 +1511,47 @@ def edit_processidfile(self): ff.write(template % replace_dict) ff.close() + + def generate_subprocess_directory_end(self, **opt): + """ opt contain all local variable of the fortran original function""" + if self.include_multi_channel: + #self.edit_coloramps() # AV new file (NB this is Sigma-specific, should not be a symlink to Subprocesses) + subproc_diagrams_for_config = opt['subproc_diagrams_for_config'] + misc.sprint(len(subproc_diagrams_for_config)) + self.edit_coloramps( subproc_diagrams_for_config) + # AV - new method - def edit_coloramps(self): + def edit_coloramps(self, config_subproc_map): """Generate coloramps.h""" + + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') template = open(pjoin(self.template_path,'gpu','coloramps.h'),'r').read() ff = open(pjoin(self.path, 'coloramps.h'),'w') # The following five lines from OneProcessExporterCPP.get_sigmaKin_lines (using OneProcessExporterCPP.get_icolamp_lines) replace_dict={} + + lines = [] + # Output only configs that have some corresponding diagrams + iconfig = 0 + for config in config_subproc_map: + if set(config) == set([0]): + continue + lines.append(" {%i, %i}," % (config[0], iconfig)) + iconfig += 1 + replace_dict['diag_to_channel'] = '\n'.join(lines) + misc.sprint(replace_dict) + if self.include_multi_channel: # NB unnecessary as edit_coloramps is not called otherwise... multi_channel = self.get_multi_channel_dictionary(self.matrix_elements[0].get('diagrams'), self.include_multi_channel) replace_dict['is_LC'] = self.get_icolamp_lines(multi_channel, self.matrix_elements[0], 1) replace_dict['nb_channel'] = len(multi_channel) replace_dict['nb_color'] = max(1,len(self.matrix_elements[0].get('color_basis'))) + + misc.sprint(multi_channel) + misc.sprint(self.path, os.getcwd()) + #raise Exception + # AV extra formatting (e.g. gg_tt was "{{true,true};,{true,false};,{false,true};};") replace_dict['is_LC'] = replace_dict['is_LC'].replace(',',', ').replace('{{',' { ').replace('};, {',' },\n { ').replace('};};',' }') ff.write(template % replace_dict) @@ -1676,6 +1703,7 @@ def get_reset_jamp_lines(self, color_amplitudes): if ret_lines != '' : ret_lines = ' // Reset jamp (reset color flows)\n' + ret_lines # AV THIS SHOULD NEVER HAPPEN! return ret_lines + #------------------------------------------------------------------------------------ import madgraph.core.helas_objects as helas_objects From 41ddc38569b8e40b3e10b1a5fe43ee0486dc8810 Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Fri, 31 May 2024 17:25:25 +0200 Subject: [PATCH 12/40] fix a issue for omp compilation --- .../iolibs/template_files/gpu/process_sigmaKin_function.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index 1b12e4007b..60f44b907d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -115,7 +115,7 @@ // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp +#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp,mgOnGpu::diag_to_channel #else #define _OMPLIST1 #endif From d34c2c21338abe59b8b8501a9f24b47e0db0bd33 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:18:46 +0200 Subject: [PATCH 13/40] [susy] revert the five recent WIP attempts to debug #826 in susy_gg_t1t1.mad run_card.dat and make_opts Revert "[susy] in susy_gg_t1t1.mad runcard and make_opts, try to change -O2 to -O1 in global_flags to fix all SIGFPEs in Frtran (#826)" This reverts commit 7b160e8f342c9dc5eb9b3c0d0719c417dbc44436. Revert "[susy] in susy_gg_t1t1.mad runcard and make_opts, add -g in global_flags to debug SIGFPEs in Frtran (#826)" This reverts commit dd4df95d98c42cd9bbc9cc11fbad4396893b2464. Revert "[susy] in susy_gg_t1t1.mad runcard and make_opts, try to change -O3 to -O2 in global_flags to fix all SIGFPEs in Frtran (#826)" This reverts commit 40ef9530894e5db37e6d8fbf398cda14879a4f33. Revert "[susy] in susy_gg_t1t1.mad runcard, try to add -fno-tree-vectorize to global flags to fix a SIGFPE in Fortran rotxx (see #826)" This reverts commit 6d3ebbc942557891520cf47e28d4b1e6fc74bd14. Revert "[susy] in susy_gg_t1t1.mad make_opts, try to add -fno-tree-vectorize to global flags to fix a SIGFPE in Fortran rotxx (see #826)" This reverts commit b01019813a31006bf4e73cbcd3addc96f00eeb14. --- epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat | 2 +- epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat index 90222d44c3..57e8e92627 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat @@ -92,7 +92,7 @@ #********************************************************************* # Compilation flag. #********************************************************************* - -O1 -ffast-math -fbounds-check -fno-tree-vectorize -g = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) + -O3 -ffast-math -fbounds-check = global_flag ! build flags for all Fortran code (for a fair comparison to cudacpp; default is -O) --fast-math = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' -O3 = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' 16384 = vector_size ! size of fortran arrays allocated in the multi-event API for SIMD/GPU (VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts index ad917c5fde..e4b87ee6ad 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/make_opts @@ -1,7 +1,7 @@ DEFAULT_CPP_COMPILER=g++ DEFAULT_F2PY_COMPILER=f2py3 DEFAULT_F_COMPILER=gfortran -GLOBAL_FLAG=-O1 -ffast-math -fbounds-check -fno-tree-vectorize -g +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled From 335d6d21d3d8e5e8542adf2f69fd3b458a12554b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:21:08 +0200 Subject: [PATCH 14/40] [susy] regenerate susy_gg_t1t1.mad, check that all is ok - prepare to merge Olivier's fix --- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 42e364fe16..98bfa8fbbd 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,19 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.116 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Wrote files for 16 helas calls in 0.117 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.198 s +ALOHA: aloha creates 3 routines in 0.196 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.192 s +ALOHA: aloha creates 6 routines in 0.193 s VVV1 VSS1 VSS1 @@ -645,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.408s -user 0m2.655s -sys 0m0.256s +real 0m2.960s +user 0m2.686s +sys 0m0.271s Code generation completed in 3 seconds ************************************************************ * * From f77b90310c3623cdc8e5d7fffd1ece6f0bc4d9da Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:27:03 +0200 Subject: [PATCH 15/40] [susy] in CODEGEN process_sigmaKin_function.inc, fix clang formatting in Olivier's patch PR #852 for issue #826 in susy_gg_t1t1 --- .../template_files/gpu/process_sigmaKin_function.inc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc index 60f44b907d..3e026d553c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_sigmaKin_function.inc @@ -4,7 +4,7 @@ ! Copyright (C) 2020-2024 CERN and UCLouvain. ! Licensed under the GNU Lesser General Public License (version 3 or later). ! Modified by: A. Valassi (Sep 2021) for the MG5aMC CUDACPP plugin. -! Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. +! Further modified by: O. Mattelaer, J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. !========================================================================== #include "GpuAbstraction.h" @@ -72,7 +72,7 @@ // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id + const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id fptype targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { @@ -115,7 +115,7 @@ // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp,mgOnGpu::diag_to_channel +#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp, mgOnGpu::diag_to_channel #else #define _OMPLIST1 #endif @@ -187,7 +187,7 @@ // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id + const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id fptype_sv targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { From 1485eb371dc55a4900f4dde7b2115fba0a768b57 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:31:07 +0200 Subject: [PATCH 16/40] [susy] regenerate susy_gg_t1t1.mad with Olivier's patch PR #852 for issue #826 --- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 23 +++++++++++-------- .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc | 6 ++--- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 11 ++++++++- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 98bfa8fbbd..0949f191e0 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.132 s +1 processes with 6 diagrams generated in 0.131 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,13 +592,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.117 s +DEBUG: os.getcwd() =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [export_v4.py at line 6438]  +DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1520]  +DEBUG: replace_dict =  {'diag_to_channel': ' {2, 0},\n {3, 1},\n {4, 2},\n {5, 3},\n {6, 4},'} [model_handling.py at line 1543]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5]} [model_handling.py at line 1551]  +DEBUG: self.path, os.getcwd() =  . /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [model_handling.py at line 1552]  +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s +Wrote files for 16 helas calls in 0.130 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.197 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -645,10 +650,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.960s -user 0m2.686s -sys 0m0.271s -Code generation completed in 3 seconds +real 0m3.863s +user 0m2.681s +sys 0m0.265s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc index af8b10e407..71948806bd 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc @@ -1009,7 +1009,7 @@ namespace mg5amcCpu // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = channelId - 1; // coloramps.h uses the C array indexing starting at 0 + const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id fptype targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { @@ -1052,7 +1052,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp +#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp, mgOnGpu::diag_to_channel #else #define _OMPLIST1 #endif @@ -1124,7 +1124,7 @@ namespace mg5amcCpu // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = channelId - 1; // coloramps.h uses the C array indexing starting at 0 + const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id fptype_sv targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index dd9c9415de..72ee9e0ea7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -6,10 +6,19 @@ #ifndef COLORAMPS_H #define COLORAMPS_H 1 +#include + namespace mgOnGpu { - __device__ constexpr bool icolamp[5][2] = { + __device__ std::map diag_to_channel = { + {2, 0}, + {3, 1}, + {4, 2}, + {5, 3}, + {6, 4}, + }; + __device__ constexpr bool icolamp[5][2] = { { true, true }, { true, true }, { true, false }, From c8fc741c87f2fe47e3615f8e79137ee87c054b44 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:32:47 +0200 Subject: [PATCH 17/40] [susy] in susy_gg_t1t1.mad coloramps.h, improve visual formatting (strangely, clang formatting was not complaining?) --- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 72ee9e0ea7..2477b5db85 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -11,14 +11,15 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { - {2, 0}, - {3, 1}, - {4, 2}, - {5, 3}, - {6, 4}, + __device__ std::map diag_to_channel = { + {2, 0}, + {3, 1}, + {4, 2}, + {5, 3}, + {6, 4}, }; - __device__ constexpr bool icolamp[5][2] = { + + __device__ constexpr bool icolamp[5][2] = { { true, true }, { true, true }, { true, false }, From 05265af3b569388dba0e055d9351f5fa93f66589 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:34:59 +0200 Subject: [PATCH 18/40] [susy] in CODEGEN coloramps.h, improve visual formatting (strangely, clang formatting was not complaining?) --- .../madgraph/iolibs/template_files/gpu/coloramps.h | 9 +++++---- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 10 +++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h index 1a11eb0e11..3210626126 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h @@ -11,10 +11,11 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { - %(diag_to_channel)s - }; - __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { + __device__ std::map diag_to_channel = { +%(diag_to_channel)s + }; + + __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { %(is_LC)s }; diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 2477b5db85..169302312b 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -12,11 +12,11 @@ namespace mgOnGpu { __device__ std::map diag_to_channel = { - {2, 0}, - {3, 1}, - {4, 2}, - {5, 3}, - {6, 4}, + { 2, 0 }, + { 3, 1 }, + { 4, 2 }, + { 5, 3 }, + { 6, 4 }, }; __device__ constexpr bool icolamp[5][2] = { From 8bdbfe226374c6e619fae4b39bfd341aca32c5b9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:37:21 +0200 Subject: [PATCH 19/40] [susy] in CODEGEN/checkFormatting.sh, add coloramps.h to the list of files checked with clang formatting --- epochX/cudacpp/CODEGEN/checkFormatting.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/checkFormatting.sh b/epochX/cudacpp/CODEGEN/checkFormatting.sh index a016a79582..a6db392557 100755 --- a/epochX/cudacpp/CODEGEN/checkFormatting.sh +++ b/epochX/cudacpp/CODEGEN/checkFormatting.sh @@ -37,7 +37,7 @@ function checkProcdir() cd $TOPDIR if [ ! -d $procdir ]; then echo "ERROR! Directory not found $TOPDIR/$procdir"; exit 1; fi # Define the list of files to be checked - files=$(\ls $procdir/src/*.cc $procdir/src/*.h $procdir/SubProcesses/*.cc $procdir/SubProcesses/*.h $procdir/SubProcesses/P*/check_sa.cc $procdir/SubProcesses/P*/CPPProcess.cc $procdir/SubProcesses/P*/CPPProcess.h $procdir/SubProcesses/P*/epoch_process_id.h) + files=$(\ls $procdir/src/*.cc $procdir/src/*.h $procdir/SubProcesses/*.cc $procdir/SubProcesses/*.h $procdir/SubProcesses/P*/check_sa.cc $procdir/SubProcesses/P*/CPPProcess.cc $procdir/SubProcesses/P*/CPPProcess.h $procdir/SubProcesses/P*/epoch_process_id.h $procdir/SubProcesses/P*/coloramps.h) if [ "$files" == "" ]; then echo "ERROR! No files to check found in directory $TOPDIR/$procdir"; exit 1; fi # Check each file status=0 From 7a9258cb8a770ed21268109613f08ae60876db3f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:41:57 +0200 Subject: [PATCH 20/40] [susy] in susy_gg_t1t1.mad coloramps.h, add a comment that a trailing comma is allowed in the std::map initializer list --- .../susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 169302312b..7dac6a66b4 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -16,7 +16,7 @@ namespace mgOnGpu { 3, 1 }, { 4, 2 }, { 5, 3 }, - { 6, 4 }, + { 6, 4 }, // note: a trailing comma in the initializer list is allowed }; __device__ constexpr bool icolamp[5][2] = { From 4521b6527f7ee5029a8239e978a3aaf139a8c519 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:52:00 +0200 Subject: [PATCH 21/40] [susy] in CODEGEN coloramps.h, add a comment that a trailing comma is allowed in the std::map initializer > --- .../madgraph/iolibs/template_files/gpu/coloramps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h index 3210626126..0edad6fbb8 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h @@ -12,7 +12,7 @@ namespace mgOnGpu { __device__ std::map diag_to_channel = { -%(diag_to_channel)s +%(diag_to_channel)s // note: a trailing comma in the initializer list is allowed }; __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { From db56226f82bbd97cd317b0ad394c8c692dd1c812 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:53:27 +0200 Subject: [PATCH 22/40] [susy] in CODEGEN, fix clang formatting for coloramps.h --- .../madgraph/iolibs/template_files/gpu/coloramps.h | 5 ++--- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h index 0edad6fbb8..60e02866da 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h @@ -11,13 +11,12 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { + __device__ std::map diag_to_channel = { %(diag_to_channel)s // note: a trailing comma in the initializer list is allowed }; __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { -%(is_LC)s - }; +%(is_LC)s }; } #endif // COLORAMPS_H diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 3e685227dc..169f4a1d49 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1537,7 +1537,7 @@ def edit_coloramps(self, config_subproc_map): for config in config_subproc_map: if set(config) == set([0]): continue - lines.append(" {%i, %i}," % (config[0], iconfig)) + lines.append(" { %i, %i }," % (config[0], iconfig)) iconfig += 1 replace_dict['diag_to_channel'] = '\n'.join(lines) misc.sprint(replace_dict) From ade95e86ffe24c9adbb2a624b78f7ef6dca43bd1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 18:55:32 +0200 Subject: [PATCH 23/40] [susy] regenerate susy_gg_t1t1.mad after fixing clang formatting, all ok Note: cuda compilation 'make -j BACKEND=cuda' fails with coloramps.h(14): error: dynamic initialization is not supported for a __device__ variable --- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 22 +++++++++---------- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 7 +++--- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 0949f191e0..329a221454 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.131 s +1 processes with 6 diagrams generated in 0.132 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -594,22 +594,22 @@ INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: os.getcwd() =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [export_v4.py at line 6438]  DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1520]  -DEBUG: replace_dict =  {'diag_to_channel': ' {2, 0},\n {3, 1},\n {4, 2},\n {5, 3},\n {6, 4},'} [model_handling.py at line 1543]  +DEBUG: replace_dict =  {'diag_to_channel': ' { 2, 0 },\n { 3, 1 },\n { 4, 2 },\n { 5, 3 },\n { 6, 4 },'} [model_handling.py at line 1543]  DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5]} [model_handling.py at line 1551]  DEBUG: self.path, os.getcwd() =  . /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [model_handling.py at line 1552]  -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.130 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Wrote files for 16 helas calls in 0.131 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.197 s +ALOHA: aloha creates 3 routines in 0.202 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.193 s +ALOHA: aloha creates 6 routines in 0.198 s VVV1 VSS1 VSS1 @@ -650,10 +650,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.863s -user 0m2.681s -sys 0m0.265s -Code generation completed in 4 seconds +real 0m3.033s +user 0m2.719s +sys 0m0.266s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 7dac6a66b4..0f4a0978a8 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -11,21 +11,20 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { + __device__ std::map diag_to_channel = { { 2, 0 }, { 3, 1 }, { 4, 2 }, { 5, 3 }, { 6, 4 }, // note: a trailing comma in the initializer list is allowed - }; + }; __device__ constexpr bool icolamp[5][2] = { { true, true }, { true, true }, { true, false }, { true, false }, - { false, true } - }; + { false, true } }; } #endif // COLORAMPS_H From 38b3afd4aa05c13fc0d840d38893d9d56b912fa8 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 19:31:47 +0200 Subject: [PATCH 24/40] [susy] in susy_gg_t1t1.mad, try to fix coloramps.h by defining the map as constexpr, this fails with a different error ccache /usr/local/cuda-12.0/bin/nvcc -I. -I../../src -Xcompiler -O3 -gencode arch=compute_70,code=compute_70 -gencode arch=compute_70,code=sm_70 -lineinfo -use_fast_math -I/usr/local/cuda-12.0/include/ -DUSE_NVTX -std=c++17 -ccbin /usr/lib64/ccache/g++ -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -Xcompiler -fPIC -c -x cu CPPProcess.cc -o CPPProcess_cuda.o coloramps.h(14): error: expression must have a constant value coloramps.h(14): note #2703-D: cannot call non-constexpr function "std::map<_Key, _Tp, _Compare, _Alloc>::map(std::initializer_list::value_type>, const _Compare &, const std::map<_Key, _Tp, _Compare, _Alloc>::allocator_type &) [with _Key=int, _Tp=int, _Compare=std::less, _Alloc=std::allocator>]" /usr/include/c++/11/bits/stl_map.h(228): here coloramps.h(14): error: a constexpr variable must have a literal type or a reference type CPPProcess.cc(1012): error: no operator "[]" matches these operands operand types are: const std::map, std::allocator>> [ const unsigned int ] 3 errors detected in the compilation of "CPPProcess.cc". --- .../susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 0f4a0978a8..4ac3f6f0e9 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -11,7 +11,7 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { + __device__ constexpr std::map diag_to_channel = { { 2, 0 }, { 3, 1 }, { 4, 2 }, From d3c29a0ac36ac0d8ed0bbbb266d4bb305de252ba Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 19:41:39 +0200 Subject: [PATCH 25/40] [susy] in susy_gg_t1t1.mad, revert the attempt to fix coloramps.h by making the map constexpr Revert "[susy] in susy_gg_t1t1.mad, try to fix coloramps.h by defining the map as constexpr, this fails with a different error" This reverts commit 38b3afd4aa05c13fc0d840d38893d9d56b912fa8. --- .../susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 4ac3f6f0e9..0f4a0978a8 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -11,7 +11,7 @@ namespace mgOnGpu { - __device__ constexpr std::map diag_to_channel = { + __device__ std::map diag_to_channel = { { 2, 0 }, { 3, 1 }, { 4, 2 }, From 5a2a6e9c3ff981ad635eef811b48dea8fb38f80f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 19:46:21 +0200 Subject: [PATCH 26/40] [susy] in susy_gg_t1t1.mad, try to fix coloramps.h by changing the map into a constexpr int array --- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 0f4a0978a8..abfedaf4c9 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -11,12 +11,14 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { - { 2, 0 }, - { 3, 1 }, - { 4, 2 }, - { 5, 3 }, - { 6, 4 }, // note: a trailing comma in the initializer list is allowed + __device__ constexpr int diag_to_channel[7] = { + -1, // 0 --> None + -1, // 1 --> None + +0, // 2 --> 0 + +1, // 3 --> 1 + +2, // 4 --> 2 + +3, // 5 --> 3 + +4 // 6 --> 4 }; __device__ constexpr bool icolamp[5][2] = { From 340baab2a17168b5fd85de41df202a83f729f36e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 20:15:59 +0200 Subject: [PATCH 27/40] [susy] in CODEGEN, try to fix coloramps.h by changing the map into a constexpr int array --- .../iolibs/template_files/gpu/coloramps.h | 4 ++-- .../PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h index 60e02866da..fae5cb3cf2 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/coloramps.h @@ -11,8 +11,8 @@ namespace mgOnGpu { - __device__ std::map diag_to_channel = { -%(diag_to_channel)s // note: a trailing comma in the initializer list is allowed + __device__ constexpr int diag_to_channel[%(nb_diagmax)s] = { +%(diag_to_channel)s }; __device__ constexpr bool icolamp[%(nb_channel)s][%(nb_color)s] = { diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 169f4a1d49..15efa9fb23 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1531,14 +1531,22 @@ def edit_coloramps(self, config_subproc_map): # The following five lines from OneProcessExporterCPP.get_sigmaKin_lines (using OneProcessExporterCPP.get_icolamp_lines) replace_dict={} - lines = [] - # Output only configs that have some corresponding diagrams + # Channel numbers are only defined for configs that have some corresponding diagrams + diag_to_channel = {} iconfig = 0 for config in config_subproc_map: - if set(config) == set([0]): - continue - lines.append(" { %i, %i }," % (config[0], iconfig)) + if set(config) == set([0]): continue + if config[0] in diag_to_channel: raise Exception( 'Internal error while generating coloramps.h:', config[0], 'is already in', diag_to_channel ) + diag_to_channel[config[0]] = iconfig iconfig += 1 + nb_diagmax = max(diag_to_channel.keys()) + 1 + replace_dict['nb_diagmax'] = nb_diagmax + lines = [] + for idiag in range( nb_diagmax ): + if idiag == nb_diagmax-1: sep=' // ' + else: sep=', // ' + if idiag in diag_to_channel: lines.append(" %+i%s%i --> %s"%( diag_to_channel[idiag], sep, idiag, diag_to_channel[idiag] ) ) + else: lines.append(" %+i%s%i --> %s"%( -1, sep, idiag, 'None' ) ) replace_dict['diag_to_channel'] = '\n'.join(lines) misc.sprint(replace_dict) From a04a900b0ae6c36a103c85f10cb53c4e3f2c4382 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 20:22:34 +0200 Subject: [PATCH 28/40] [susy] regenerate susy_gg_t1t1.mad with the latest CODEGEN, all ok (includes clang format fixes) This code now compiles... but it does not seem to fix the empty cross section problem --- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 20 +++++++++---------- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 329a221454..f364bcf320 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -594,22 +594,22 @@ INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: os.getcwd() =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [export_v4.py at line 6438]  DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1520]  -DEBUG: replace_dict =  {'diag_to_channel': ' { 2, 0 },\n { 3, 1 },\n { 4, 2 },\n { 5, 3 },\n { 6, 4 },'} [model_handling.py at line 1543]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5]} [model_handling.py at line 1551]  -DEBUG: self.path, os.getcwd() =  . /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [model_handling.py at line 1552]  +DEBUG: replace_dict =  {'nb_diagmax': 7, 'diag_to_channel': ' -1, // 0 --> None\n -1, // 1 --> None\n +0, // 2 --> 0\n +1, // 3 --> 1\n +2, // 4 --> 2\n +3, // 5 --> 3\n +4 // 6 --> 4'} [model_handling.py at line 1551]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5]} [model_handling.py at line 1559]  +DEBUG: self.path, os.getcwd() =  . /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [model_handling.py at line 1560]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.131 s +Wrote files for 16 helas calls in 0.132 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.202 s +ALOHA: aloha creates 3 routines in 0.209 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.198 s +ALOHA: aloha creates 6 routines in 0.194 s VVV1 VSS1 VSS1 @@ -650,9 +650,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.033s -user 0m2.719s -sys 0m0.266s +real 0m2.979s +user 0m2.696s +sys 0m0.270s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index abfedaf4c9..569859d6a7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -18,7 +18,7 @@ namespace mgOnGpu +1, // 3 --> 1 +2, // 4 --> 2 +3, // 5 --> 3 - +4 // 6 --> 4 + +4 // 6 --> 4 }; __device__ constexpr bool icolamp[5][2] = { From 3da9868738b1f038f2cda8cbedbfe9549a0225c5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 31 May 2024 20:25:49 +0200 Subject: [PATCH 29/40] [susy] rerun tmad test for susy_gg_t1t1: still no cross section (#826 is NOT fixed) after Olivier's PR #852 patch ./tmad/teeMadX.sh -susyggt1t1 +10x -makeclean --- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index e20853bb7a..8baf157d03 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-30_11:47:06 +DATE: 2024-05-31_20:25:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4256s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4161s - [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4073s + [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.80E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3216s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3122s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s + [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.4297s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3279s - [COUNTERS] Fortran MEs ( 1 ) : 0.1019s for 90112 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4099s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3097s + [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- From 4454d2e079cf3dc4577c719ef99da87b9426739d Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 1 Jun 2024 15:02:55 +0200 Subject: [PATCH 30/40] [susy] in tmad/madX.sh, use channelId=1 by default but use a different channelId for susy_gg_t1t1 (fix issue #826) --- epochX/cudacpp/tmad/madX.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh index eaeaf654fc..b9ef08593f 100755 --- a/epochX/cudacpp/tmad/madX.sh +++ b/epochX/cudacpp/tmad/madX.sh @@ -257,6 +257,7 @@ function getgridmax() # Create an input file that is appropriate for the specific process function getinputfile() { + channelId=1 # use channelId=1 by default nevt=$(getnevt) tmpdir=/tmp/$USER mkdir -p $tmpdir @@ -280,6 +281,7 @@ function getinputfile() tmp=$tmpdir/input_susyggtt elif [ "${susyggt1t1}" == "1" ]; then tmp=$tmpdir/input_susyggt1t1 + channelId=3 # channelId=1 does not exist in susyggt1t1 (issue #826) elif [ "${smeftggtttt}" == "1" ]; then tmp=$tmpdir/input_smeftggtttt else @@ -308,7 +310,7 @@ ${nevt} 1 1 ! Number of events and max and min iterations 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +${channelId} ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) EOF echo ${tmp} } From 6e76ade167fabacc74e6e1c8ed146b5bf3cd6038 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 1 Jun 2024 20:28:02 +0200 Subject: [PATCH 31/40] [susy] improve comments and variable names in tmad/madX.sh: iconfig not channelId (and note that iconfig=1 is ok) --- epochX/cudacpp/tmad/madX.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh index b9ef08593f..d81435a27b 100755 --- a/epochX/cudacpp/tmad/madX.sh +++ b/epochX/cudacpp/tmad/madX.sh @@ -257,7 +257,7 @@ function getgridmax() # Create an input file that is appropriate for the specific process function getinputfile() { - channelId=1 # use channelId=1 by default + iconfig=1 # use iconfig=1 by default (NB: this does not mean channel_id=1 i.e. the first diagram, see #826) nevt=$(getnevt) tmpdir=/tmp/$USER mkdir -p $tmpdir @@ -281,7 +281,7 @@ function getinputfile() tmp=$tmpdir/input_susyggtt elif [ "${susyggt1t1}" == "1" ]; then tmp=$tmpdir/input_susyggt1t1 - channelId=3 # channelId=1 does not exist in susyggt1t1 (issue #826) + iconfig=2 # try to use a different iconfig in susyggt1t1 (issue #826) elif [ "${smeftggtttt}" == "1" ]; then tmp=$tmpdir/input_smeftggtttt else @@ -310,7 +310,7 @@ ${nevt} 1 1 ! Number of events and max and min iterations 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -${channelId} ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +${iconfig} ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) EOF echo ${tmp} } From 02120437d6b7fed4df3b87ace4f8ab7ab92faa44 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 2 Jun 2024 07:33:29 +0200 Subject: [PATCH 32/40] [susy] in tmad/madX.sh, keep iconfig=1 for the moment also for susy_gg_t1t1 (will give zero cross section #826) --- epochX/cudacpp/tmad/madX.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh index d81435a27b..03ed6d08f0 100755 --- a/epochX/cudacpp/tmad/madX.sh +++ b/epochX/cudacpp/tmad/madX.sh @@ -281,7 +281,7 @@ function getinputfile() tmp=$tmpdir/input_susyggtt elif [ "${susyggt1t1}" == "1" ]; then tmp=$tmpdir/input_susyggt1t1 - iconfig=2 # try to use a different iconfig in susyggt1t1 (issue #826) + ###iconfig=2 # try to use a different iconfig in susyggt1t1 (issue #826) elif [ "${smeftggtttt}" == "1" ]; then tmp=$tmpdir/input_smeftggtttt else From 3208fd94690054b36726b315e5b93880c4179b8e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 2 Jun 2024 08:18:45 +0200 Subject: [PATCH 33/40] [susy] in susy_gg_t1t1.mad, document and clean up the mapping between channelId and iconfig in F and C (#826 and $852) --- .../SubProcesses/P1_gg_t1t1x/CPPProcess.cc | 16 +++--- .../SubProcesses/P1_gg_t1t1x/coloramps.h | 49 ++++++++++++------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc index 71948806bd..d668b26dc6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc @@ -1009,7 +1009,9 @@ namespace mg5amcCpu // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id + const unsigned int channelIdC = channelId - 1; // channelIdC_to_iconfig in coloramps.h uses the C array indexing starting at 0 + const unsigned int iconfig = mgOnGpu::channelIdC_to_iconfig[channelIdC]; // map N_diagrams to N_config <= N_diagrams configs (see #826 and #852) + const unsigned int iconfigC = iconfig - 1; // icolamp in coloramps.h uses the C array indexing starting at 0 fptype targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { @@ -1017,7 +1019,7 @@ namespace mg5amcCpu targetamp[icolC] = 0; else targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[channelIdC][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; + if( mgOnGpu::icolamp[iconfigC][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; } //printf( "sigmaKin: ievt=%4d rndcol=%f\n", ievt, allrndcol[ievt] ); for( int icolC = 0; icolC < ncolor; icolC++ ) @@ -1052,7 +1054,7 @@ namespace mg5amcCpu // - firstprivate: give each thread its own copy, and initialise with value from outside #define _OMPLIST0 allcouplings, allMEs, allmomenta, allrndcol, allrndhel, allselcol, allselhel, cGoodHel, cNGoodHel, npagV2 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL -#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp, mgOnGpu::diag_to_channel +#define _OMPLIST1 , allDenominators, allNumerators, channelId, mgOnGpu::icolamp, mgOnGpu::channelIdC_to_iconfig #else #define _OMPLIST1 #endif @@ -1124,7 +1126,9 @@ namespace mg5amcCpu // Event-by-event random choice of color #402 if( channelId != 0 ) // no event-by-event choice of color if channelId == 0 (fix FPE #783) { - const unsigned int channelIdC = mgOnGpu::diag_to_channel[channelId]; // coloramps.h uses a channel ordering not the diagram id + const unsigned int channelIdC = channelId - 1; // channelIdC_to_iconfig in coloramps.h uses the C array indexing starting at 0 + const unsigned int iconfig = mgOnGpu::channelIdC_to_iconfig[channelIdC]; // map N_diagrams to N_config <= N_diagrams configs (see #826 and #852) + const unsigned int iconfigC = iconfig - 1; // icolamp in coloramps.h uses the C array indexing starting at 0 fptype_sv targetamp[ncolor] = { 0 }; for( int icolC = 0; icolC < ncolor; icolC++ ) { @@ -1132,7 +1136,7 @@ namespace mg5amcCpu targetamp[icolC] = fptype_sv{ 0 }; else targetamp[icolC] = targetamp[icolC - 1]; - if( mgOnGpu::icolamp[channelIdC][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; + if( mgOnGpu::icolamp[iconfigC][icolC] ) targetamp[icolC] += jamp2_sv[icolC]; } #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT fptype_sv targetamp2[ncolor] = { 0 }; @@ -1142,7 +1146,7 @@ namespace mg5amcCpu targetamp2[icolC] = fptype_sv{ 0 }; else targetamp2[icolC] = targetamp2[icolC - 1]; - if( mgOnGpu::icolamp[channelIdC][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; + if( mgOnGpu::icolamp[iconfigC][icolC] ) targetamp2[icolC] += jamp2_sv[ncolor + icolC]; } #endif for( int ieppV = 0; ieppV < neppV; ++ieppV ) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h index 569859d6a7..ecdb28e53e 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/coloramps.h @@ -1,32 +1,43 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: O. Mattelaer, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin. #ifndef COLORAMPS_H #define COLORAMPS_H 1 -#include - -namespace mgOnGpu +namespace mgOnGpu /* clang-format off */ { - - __device__ constexpr int diag_to_channel[7] = { - -1, // 0 --> None - -1, // 1 --> None - +0, // 2 --> 0 - +1, // 3 --> 1 - +2, // 4 --> 2 - +3, // 5 --> 3 - +4 // 6 --> 4 + // Summary of numbering and indexing conventions for the relevant concepts (see issue #826 and PR #852) + // - Diagram number (no variable) in [1, N_diagrams]: all values are allowed (N_diagrams distinct values) + // => this number is displayed for information before each block of code in CPPProcess.cc + // - Channel number ("channelId" in C, CHANNEL_ID in F) in [1, N_diagrams]: not all values are allowed (N_config <= N_diagrams distinct values) + // => this number (with F indexing) is passed around as an API argument between cudacpp functions + // - Channel number in C indexing: "channelIdC" = channelID - 1 + // => this number (with C indexing) is used as the index of the channelIdC_to_iconfig array below + // - Config number ("iconfig" in C, ICONFIG in F) in [1, N_config]: all values are allowed (N_config <= N_diagrams distinct values) + // - Config number in C indexing: "iconfigC" = iconfig - 1 + + // Map channelIdC (in C indexing, i.e. channelId-1) to iconfig (in F indexing) + // This array has N_diagrams elements, but only N_config <= N_diagrams valid (non-zero) values + __device__ constexpr int channelIdC_to_iconfig[6] = { + 0, // channelId=1 (diagram=1) i.e. channelIdC=0 --> iconfig=None + 1, // channelId=2 (diagram=2) i.e. channelIdC=1 --> iconfig=1 + 2, // channelId=3 (diagram=3) i.e. channelIdC=2 --> iconfig=2 + 2, // channelId=4 (diagram=4) i.e. channelIdC=3 --> iconfig=3 + 3, // channelId=5 (diagram=5) i.e. channelIdC=4 --> iconfig=4 + 4 // channelId=6 (diagram=6) i.e. channelIdC=5 --> iconfig=5 }; + // Map iconfigC (in C indexing, i.e. iconfig-1) to the set of allowed colors + // This array has N_config <= N_diagrams elements __device__ constexpr bool icolamp[5][2] = { - { true, true }, - { true, true }, - { true, false }, - { true, false }, - { false, true } }; + { true, true }, // iconfig=1 i.e. iconfigC=0 + { true, true }, // iconfig=2 i.e. iconfigC=1 + { true, false }, // iconfig=3 i.e. iconfigC=2 + { true, false }, // iconfig=4 i.e. iconfigC=3 + { false, true } // iconfig=5 i.e. iconfigC=4 + }; -} +} /* clang-format off */ #endif // COLORAMPS_H From abccd5ef638d1e737f0d10dc771b3954aa81ca46 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 2 Jun 2024 08:22:18 +0200 Subject: [PATCH 34/40] [susy] rerun susy_gg_t1t1 tmad test - not surprisingly, still no cross section (#826) ./tmad/teeMadX.sh -susyggt1t1 +10x --- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 8baf157d03..5034632118 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-31_20:25:23 +DATE: 2024-06-02_08:21:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -47,7 +47,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' [OPENMPTH] omp_get_max_threads/nproc = 1/4 @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.4166s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4073s - [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3981s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s + [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.35E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -72,7 +72,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' [OPENMPTH] omp_get_max_threads/nproc = 1/4 @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s - [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2988s + [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.24E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -97,7 +97,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' [OPENMPTH] omp_get_max_threads/nproc = 1/4 @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.4099s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3097s - [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3440s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2472s + [COUNTERS] Fortran MEs ( 1 ) : 0.0968s for 90112 events => throughput is 9.31E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -122,7 +122,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' [OPENMPTH] omp_get_max_threads/nproc = 1/4 From 8d2c82baeb470f0a515dbe95450f5181ab7ddfb1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 26 Jun 2024 12:22:28 +0200 Subject: [PATCH 35/40] [susy] go back to CODEGEN logs from upstream/master to ease merges git checkout upstream/master $(git ls-tree --name-only HEAD */CODEGEN*txt) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 18 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 20 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 ++--- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 24 ++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 24 ++--- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 24 ++--- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +-- .../CODEGEN_mad_heft_gg_bb_log.txt | 16 ++-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 10 +- .../CODEGEN_mad_pp_tt012j_log.txt | 94 +++++++++---------- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 ++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 +-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 25 ++--- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 8 +- .../CODEGEN_mad_susy_gg_tt_log.txt | 16 ++-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 10 +- 22 files changed, 215 insertions(+), 220 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 91519bd199..01558e97fa 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058062076568603516  +DEBUG: model prefixing takes 0.005818367004394531  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,8 +176,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.105 s +Wrote files for 8 helas calls in 0.104 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.212 s +ALOHA: aloha creates 3 routines in 0.213 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.270 s +ALOHA: aloha creates 7 routines in 0.280 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.629s -user 0m1.753s -sys 0m0.211s +real 0m1.985s +user 0m1.747s +sys 0m0.212s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index cfbf379d71..b0fc131d4f 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005816221237182617  +DEBUG: model prefixing takes 0.005814552307128906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -203,7 +203,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.775s -user 0m0.647s -sys 0m0.057s -Code generation completed in 0 seconds +real 0m0.689s +user 0m0.639s +sys 0m0.045s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index af7830f01e..4da158143b 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005688667297363281  +DEBUG: model prefixing takes 0.0058248043060302734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,16 +194,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.109 s +Wrote files for 10 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.156 s +ALOHA: aloha creates 2 routines in 0.154 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.142 s +ALOHA: aloha creates 4 routines in 0.139 s VVV1 FFV1 FFV1 @@ -239,10 +239,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.902s -user 0m1.560s -sys 0m0.227s -Code generation completed in 2 seconds +real 0m1.797s +user 0m1.551s +sys 0m0.228s +Code generation completed in 1 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 176649bab7..a4dc00f87b 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005812644958496094  +DEBUG: model prefixing takes 0.005820274353027344  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -198,7 +198,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.561s -user 0m0.496s -sys 0m0.060s -Code generation completed in 1 seconds +real 0m0.635s +user 0m0.505s +sys 0m0.048s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 8c7deb3d21..b3f5ed282a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058057308197021484  +DEBUG: model prefixing takes 0.005791902542114258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -187,8 +187,8 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -204,8 +204,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -221,14 +221,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s -Wrote files for 46 helas calls in 0.256 s +Wrote files for 46 helas calls in 0.259 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.347 s +ALOHA: aloha creates 5 routines in 0.348 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -236,7 +236,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.340 s +ALOHA: aloha creates 10 routines in 0.330 s VVV1 VVV1 FFV1 @@ -285,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.426s -user 0m2.178s -sys 0m0.223s +real 0m2.666s +user 0m2.189s +sys 0m0.225s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index b8b101f03d..f2938af2d2 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005696535110473633  +DEBUG: model prefixing takes 0.005806684494018555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,14 +194,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.158 s +Wrote files for 36 helas calls in 0.157 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.345 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.325 s +ALOHA: aloha creates 10 routines in 0.331 s VVV1 VVV1 FFV1 @@ -254,10 +254,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.354s -user 0m2.039s -sys 0m0.226s -Code generation completed in 2 seconds +real 0m2.562s +user 0m2.060s +sys 0m0.221s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index c7633aaea7..2650bec87e 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005728721618652344  +DEBUG: model prefixing takes 0.005831718444824219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -206,7 +206,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.824s -user 0m0.772s -sys 0m0.042s -Code generation completed in 0 seconds +real 0m0.852s +user 0m0.759s +sys 0m0.056s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 9d19df8188..d479b476a0 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005671262741088867  +DEBUG: model prefixing takes 0.00582575798034668  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.166 s +1 processes with 123 diagrams generated in 0.169 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -177,8 +177,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.447 s -Wrote files for 222 helas calls in 0.745 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.460 s +Wrote files for 222 helas calls in 0.741 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.353 s +ALOHA: aloha creates 5 routines in 0.355 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.332 s +ALOHA: aloha creates 10 routines in 0.338 s VVV1 VVV1 FFV1 @@ -257,10 +257,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.779s -user 0m3.182s -sys 0m0.250s -Code generation completed in 4 seconds +real 0m3.505s +user 0m3.236s +sys 0m0.224s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index f9ef67bae5..89a9e25c18 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055997371673583984  +DEBUG: model prefixing takes 0.0057909488677978516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.171 s +1 processes with 123 diagrams generated in 0.170 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.450 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.451 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.525s -user 0m1.447s -sys 0m0.066s -Code generation completed in 2 seconds +real 0m1.544s +user 0m1.463s +sys 0m0.054s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index bcecc00009..da6acfa84a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005766391754150391  +DEBUG: model prefixing takes 0.005862236022949219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.986 s +1 processes with 1240 diagrams generated in 2.007 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -179,8 +179,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,15 +195,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.931 s -Wrote files for 2281 helas calls in 19.490 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 7.000 s +Wrote files for 2281 helas calls in 19.666 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.337 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.330 s VVV1 VVV1 FFV1 @@ -259,10 +259,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m30.786s -user 0m30.251s -sys 0m0.400s -Code generation completed in 30 seconds +real 0m31.035s +user 0m30.488s +sys 0m0.422s +Code generation completed in 32 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 4e2d7c04bd..b0d9872611 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056269168853759766  +DEBUG: model prefixing takes 0.005834102630615234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.991 s +1 processes with 1240 diagrams generated in 2.007 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.900 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 7.027 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -186,7 +186,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.376 s +ALOHA: aloha creates 5 routines in 0.374 s VVV1 VVV1 FFV1 @@ -209,7 +209,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.707s -user 0m13.535s -sys 0m0.118s +real 0m13.929s +user 0m13.764s +sys 0m0.108s Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index db4d1b2f5b..c685148505 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005600929260253906  +DEBUG: model prefixing takes 0.005826234817504883  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -200,8 +200,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,8 +217,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -234,16 +234,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.231 s +Wrote files for 32 helas calls in 0.234 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.154 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.140 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.230s -user 0m1.771s -sys 0m0.236s +real 0m2.041s +user 0m1.799s +sys 0m0.227s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 8d830a08ba..497eedfefd 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005650997161865234  +DEBUG: model prefixing takes 0.00579071044921875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.082 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -215,7 +215,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.154 s FFV1 FFV1 FFV1 @@ -231,7 +231,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.680s -user 0m0.624s -sys 0m0.051s +real 0m0.690s +user 0m0.614s +sys 0m0.065s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 99374cf5f8..534d00c329 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -149,8 +149,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -166,20 +166,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Finding symmetric diagrams for subprocess group gg_bbx Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.109 s +Wrote files for 12 helas calls in 0.111 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.276 s +ALOHA: aloha creates 4 routines in 0.280 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.474 s +ALOHA: aloha creates 8 routines in 0.264 s VVS3 VVV1 FFV1 @@ -217,9 +217,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.489s -user 0m1.764s -sys 0m0.253s +real 0m2.047s +user 0m1.806s +sys 0m0.230s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index bb27075562..8eeb803fc7 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -157,7 +157,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.283 s +ALOHA: aloha creates 4 routines in 0.277 s VVS3 VVV1 FFV1 @@ -174,7 +174,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.677s -user 0m0.600s -sys 0m0.062s -Code generation completed in 0 seconds +real 0m0.723s +user 0m0.625s +sys 0m0.046s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index beb9862c99..b26d47fafc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058329105377197266  +DEBUG: model prefixing takes 0.005836009979248047  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.144 s +13 processes with 76 diagrams generated in 0.146 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.936 s +65 processes with 1119 diagrams generated in 1.970 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -499,8 +499,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -516,8 +516,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -533,8 +533,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -550,8 +550,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -567,8 +567,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -584,8 +584,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -601,8 +601,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -618,8 +618,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -635,8 +635,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -652,8 +652,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -669,8 +669,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -686,8 +686,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -703,8 +703,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -720,8 +720,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -737,8 +737,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -754,8 +754,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -771,8 +771,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -788,8 +788,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -804,15 +804,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.367 s -Wrote files for 810 helas calls in 3.438 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.375 s +Wrote files for 810 helas calls in 3.466 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.356 s +ALOHA: aloha creates 5 routines in 0.357 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -820,7 +820,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.331 s +ALOHA: aloha creates 10 routines in 0.333 s VVV1 VVV1 FFV1 @@ -1030,10 +1030,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.387s -user 0m8.804s -sys 0m0.434s -Code generation completed in 9 seconds +real 0m9.404s +user 0m8.903s +sys 0m0.418s +Code generation completed in 10 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 422db5816a..e171469df6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1452183723449707  +DEBUG: model prefixing takes 0.14738821983337402  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.917 s +1 processes with 72 diagrams generated in 3.935 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -114,8 +114,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -131,14 +131,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s -Wrote files for 119 helas calls in 0.436 s +Wrote files for 119 helas calls in 0.442 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.333 s +ALOHA: aloha creates 5 routines in 0.339 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -146,7 +146,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.350 s +ALOHA: aloha creates 10 routines in 0.355 s VVV5 VVV5 FFV1 @@ -191,9 +191,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.180s -user 0m6.909s -sys 0m0.243s +real 0m7.305s +user 0m6.946s +sys 0m0.253s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 863794fe85..afe7467840 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14494752883911133  +DEBUG: model prefixing takes 0.14697813987731934  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.880 s +1 processes with 72 diagrams generated in 3.942 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.199 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -123,7 +123,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.334 s VVV5 VVV5 FFV1 @@ -143,7 +143,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.347s -user 0m5.256s -sys 0m0.063s +real 0m5.448s +user 0m5.330s +sys 0m0.058s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index f364bcf320..78d37d6c49 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.132 s +1 processes with 6 diagrams generated in 0.130 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -592,24 +592,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x -DEBUG: os.getcwd() =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [export_v4.py at line 6438]  -DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1520]  -DEBUG: replace_dict =  {'nb_diagmax': 7, 'diag_to_channel': ' -1, // 0 --> None\n -1, // 1 --> None\n +0, // 2 --> 0\n +1, // 3 --> 1\n +2, // 4 --> 2\n +3, // 5 --> 3\n +4 // 6 --> 4'} [model_handling.py at line 1551]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5]} [model_handling.py at line 1559]  -DEBUG: self.path, os.getcwd() =  . /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [model_handling.py at line 1560]  -Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.132 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s +Wrote files for 16 helas calls in 0.117 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.209 s +ALOHA: aloha creates 3 routines in 0.196 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.194 s +ALOHA: aloha creates 6 routines in 0.201 s VVV1 VSS1 VSS1 @@ -650,9 +645,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.979s -user 0m2.696s -sys 0m0.270s +real 0m3.081s +user 0m2.681s +sys 0m0.245s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 18293f5350..4953f08208 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.133 s +1 processes with 6 diagrams generated in 0.131 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -583,7 +583,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.193 s +ALOHA: aloha creates 3 routines in 0.196 s VVV1 VSS1 VSS1 @@ -599,7 +599,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.554s -user 0m1.342s +real 0m1.421s +user 0m1.348s sys 0m0.062s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index c6fc0cbca8..2fb4d8a715 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.124 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -576,8 +576,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1148]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,12 +597,12 @@ Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -638,9 +638,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.972s -user 0m2.522s -sys 0m0.256s +real 0m2.805s +user 0m2.548s +sys 0m0.244s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 57ce689dad..a6c54f90b2 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.124 s +1 processes with 3 diagrams generated in 0.125 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +582,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -597,7 +597,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.489s -user 0m1.276s -sys 0m0.066s +real 0m1.365s +user 0m1.280s +sys 0m0.067s Code generation completed in 1 seconds From d9d898a5d912c26eb4d17ecacfa6f8b7da1d8e5b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 27 Jun 2024 13:02:56 +0200 Subject: [PATCH 36/40] [om852asis] update MG5aMC/mg5amcnlo to e4d9df4ab in gpucpp_826 including my two recent changes in gpucpp --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 10378b3c09..e4d9df4abf 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 10378b3c0971e1a241fd9dc365e592c92d1f13ba +Subproject commit e4d9df4abf0e2753d74f83fec245e5aa4e1d1134 From 3269e6bf3259a8de33e60d8ae6f5ebdc6214c761 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 2 Jun 2024 09:05:04 +0200 Subject: [PATCH 37/40] [tmad] in tmad/madX.sh, add optional argument "-iconfig " to test a different iconfig In particular: the following triggers a SIGFPE reported in #855 (crash in rotxxx that can be fixed adding volatile?) ./tmad/madX.sh -ggttgg -iconfig 104 -makeclean This also triggers a similar SIGFPE (initially reported in #826) ./tmad/madX.sh -susyggt1t1 -iconfig 2 -makeclean --- epochX/cudacpp/tmad/madX.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh index 400789d89c..42de7a7c5f 100755 --- a/epochX/cudacpp/tmad/madX.sh +++ b/epochX/cudacpp/tmad/madX.sh @@ -28,7 +28,7 @@ export CUDACPP_RUNTIME_VECSIZEUSED=${NLOOP} function usage() { - echo "Usage: $0 [-d] [-fltonly|-mixonly] [-makeonly|-makeclean|-makecleanonly] [-rmrdat] [+10x] [-checkonly] [-nocleanup]" > /dev/stderr + echo "Usage: $0 [-d] [-fltonly|-mixonly] [-makeonly|-makeclean|-makecleanonly] [-rmrdat] [+10x] [-checkonly] [-nocleanup][-iconfig ]" > /dev/stderr echo "(NB: OMP_NUM_THREADS is taken as-is from the caller's environment)" exit 1 } @@ -64,6 +64,8 @@ checkonly=0 nocleanup=0 +iconfig= + while [ "$1" != "" ]; do if [ "$1" == "-d" ]; then debug=1 @@ -131,6 +133,9 @@ while [ "$1" != "" ]; do elif [ "$1" == "-nocleanup" ]; then nocleanup=1 shift + elif [ "$1" == "-iconfig" ] && [ "$2" != "" ]; then + iconfig=$2 + shift; shift else usage fi @@ -258,7 +263,7 @@ function getgridmax() # Create an input file that is appropriate for the specific process function getinputfile() { - iconfig=1 # use iconfig=1 by default (NB: this does not mean channel_id=1 i.e. the first diagram, see #826) + iconfig_proc=1 # use iconfig=1 by default (NB: this does not mean channel_id=1 i.e. the first diagram, see #826) nevt=$(getnevt) tmpdir=/tmp/$USER mkdir -p $tmpdir @@ -282,7 +287,7 @@ function getinputfile() tmp=$tmpdir/input_susyggtt elif [ "${susyggt1t1}" == "1" ]; then tmp=$tmpdir/input_susyggt1t1 - ###iconfig=2 # try to use a different iconfig in susyggt1t1 (issue #826) + ###iconfig_proc=2 # try to use a different iconfig in susyggt1t1 (issue #826) elif [ "${smeftggtttt}" == "1" ]; then tmp=$tmpdir/input_smeftggtttt else @@ -304,6 +309,7 @@ function getinputfile() echo "Usage: getinputfile " exit 1 fi + if [ "${iconfig}" == "" ]; then iconfig=${iconfig_proc}; fi (( nevt = nevt*$xfac )) cat << EOF >> ${tmp} ${nevt} 1 1 ! Number of events and max and min iterations From 0920709a679f50888f8df308a27e4b309f457a8b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 2 Jun 2024 18:52:19 +0200 Subject: [PATCH 38/40] [tmad] in tmad/madX.sh, use iconfig=104 in ggttgg and iconfig=2 in susyggt1t1 to test #855 fix while still exposing #826 and #856 --- epochX/cudacpp/tmad/madX.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/tmad/madX.sh b/epochX/cudacpp/tmad/madX.sh index 42de7a7c5f..1314bf0d60 100755 --- a/epochX/cudacpp/tmad/madX.sh +++ b/epochX/cudacpp/tmad/madX.sh @@ -275,6 +275,7 @@ function getinputfile() tmp=$tmpdir/input_ggttg elif [ "${ggttgg}" == "1" ]; then tmp=$tmpdir/input_ggttgg + iconfig_proc=104 # use iconfig=104 in ggttgg to check #855 SIGFPE fix (but issue #856 is pending: LHE color mismatch!) elif [ "${ggttggg}" == "1" ]; then tmp=$tmpdir/input_ggttggg elif [ "${gguu}" == "1" ]; then @@ -287,7 +288,7 @@ function getinputfile() tmp=$tmpdir/input_susyggtt elif [ "${susyggt1t1}" == "1" ]; then tmp=$tmpdir/input_susyggt1t1 - ###iconfig_proc=2 # try to use a different iconfig in susyggt1t1 (issue #826) + iconfig_proc=2 # use iconfig=2 in susyggt1t1 to check #855 SIGFPE fix (but issue #826 is pending: no cross section!) elif [ "${smeftggtttt}" == "1" ]; then tmp=$tmpdir/input_smeftggtttt else From c39b5fbe07785948e0b088b0738b62ca143ebcad Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 28 Jun 2024 00:21:45 +0200 Subject: [PATCH 39/40] [susy] go back to older tmad logs to allow merging upstream/master git checkout 32a5b40aaddbab9b9e42698427d01436ec7cf805 tmad/logs_susyggt* --- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 34 +- .../log_susyggtt_mad_d_inl0_hrd0.txt | 493 +----------------- 2 files changed, 33 insertions(+), 494 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 5034632118..059122dda6 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' - make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-02_08:21:54 +DATE: 2024-05-16_06:00:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -47,7 +47,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' [OPENMPTH] omp_get_max_threads/nproc = 1/4 @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 685 events (found 2208 events) - [COUNTERS] PROGRAM TOTAL : 0.3981s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s - [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -72,7 +72,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' [OPENMPTH] omp_get_max_threads/nproc = 1/4 @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4876 [0.48763077179780701] fbridge_mode=0 [UNWEIGHT] Wrote 648 events (found 1275 events) - [COUNTERS] PROGRAM TOTAL : 0.3077s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2988s - [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -97,7 +97,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' [OPENMPTH] omp_get_max_threads/nproc = 1/4 @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.4762 [0.47620722822826000] fbridge_mode=0 [UNWEIGHT] Wrote 1784 events (found 1789 events) - [COUNTERS] PROGRAM TOTAL : 1.3440s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2472s - [COUNTERS] Fortran MEs ( 1 ) : 0.0968s for 90112 events => throughput is 9.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2771s + [COUNTERS] Fortran MEs ( 1 ) : 0.1002s for 90112 events => throughput is 8.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -122,7 +122,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) 1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) 0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' [OPENMPTH] omp_get_max_threads/nproc = 1/4 diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 7e8cfcc6ca..fd24a61552 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-05-30_11:46:38 +DATE: 2024-05-16_05:59:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7899s - [COUNTERS] Fortran MEs ( 1 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7798s + [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4282s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3838s - [COUNTERS] Fortran MEs ( 1 ) : 0.0444s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8353s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3481s - [COUNTERS] Fortran MEs ( 1 ) : 0.4872s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3278s + [COUNTERS] Fortran MEs ( 1 ) : 0.4852s for 90112 events => throughput is 1.86E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -131,473 +131,12 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4211s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0459s for 8192 events => throughput is 1.78E+05 events/s + [XSECTION] Cross section = 171.8 [171.81273026311101] fbridge_mode=1 + [UNWEIGHT] Wrote 2338 events (found 3965 events) + [COUNTERS] PROGRAM TOTAL : 0.7007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 8192 events => throughput is 2.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419863) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.8478s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5064s for 90112 events => throughput is 1.78E+05 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.821549e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.828539e+05 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4018s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 8192 events => throughput is 3.18E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6042s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3203s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2839s for 90112 events => throughput is 3.17E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.236663e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.265774e+05 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4082s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3922s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4898s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 90112 events => throughput is 5.03E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.982338e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.050150e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4039s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4735s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3097s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1638s for 90112 events => throughput is 5.50E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.390955e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.551520e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4224s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3988s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.46E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2623s for 90112 events => throughput is 3.44E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.534035e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.557058e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8612s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8606s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.598860065419856) and cuda (44.598860065419849) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7382s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.577523870256456) and cuda (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.847950e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.628809e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.649520e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073840e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.642142e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155791e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.619665e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.071359e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +ERROR! xsec from fortran (44.598860065419856) and cpp (171.81273026311101) differ by more than 3E-14 (2.852401832941188) From d24723ef7f1fea34d6e49c8d51e7ae9d21579de5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 27 Jun 2024 23:37:53 +0200 Subject: [PATCH 40/40] [om852asis] update MG5aMC/mg5amcnlo to 74fd166c1 in gpucpp_826 including the 'volatile' fix for rotxxx crashes --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index e4d9df4abf..74fd166c1e 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit e4d9df4abf0e2753d74f83fec245e5aa4e1d1134 +Subproject commit 74fd166c1e22bde2dfe01b2e001ac3b177628165