Skip to content

Commit

Permalink
Merge pull request #1012 from valassi/amd
Browse files Browse the repository at this point in the history
workaround for FPE in vxxxxx on HIP (and fixes for v1.00.01 tags)
  • Loading branch information
valassi authored Oct 4, 2024
2 parents 7e8e033 + 0524cd1 commit 5d43720
Show file tree
Hide file tree
Showing 52 changed files with 310 additions and 211 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/archiver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,17 @@ mkdir ${outdir}
outfile=${outdir}/VERSION.txt
touch ${outfile}
dateformat='%Y-%m-%d_%H:%M:%S UTC'
cudacpp_major=$(cat __init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $1}')
cudacpp_minor=$(cat __init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $2}')
cudacpp_patch=$(cat __init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $3}')
###echo "(From CUDACPP_OUTPUT/__init__.py)"
###echo "cudacpp (major, minor, patch) = ( ${cudacpp_major}, ${cudacpp_minor}, ${cudacpp_patch} )"
if [ ${cudacpp_major} -lt 0 ] || [ ${cudacpp_major} -gt 99 ]; then echo "ERROR! cudacpp_major is not in the [0,99] range"; exit 1; fi
if [ ${cudacpp_minor} -lt 0 ] || [ ${cudacpp_minor} -gt 99 ]; then echo "ERROR! cudacpp_minor is not in the [0,99] range"; exit 1; fi
if [ ${cudacpp_patch} -lt 0 ] || [ ${cudacpp_patch} -gt 99 ]; then echo "ERROR! cudacpp_patch is not in the [0,99] range"; exit 1; fi
cudacpp_version=$(printf "%1d.%02d.%02d" ${cudacpp_major} ${cudacpp_minor} ${cudacpp_patch})
echo "(From CUDACPP_OUTPUT/__init__.py)" >> ${outfile}
echo "cudacpp_version = $(cat __init__.py | awk '/__version__/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g')" >> ${outfile}
echo "cudacpp_version = ${cudacpp_version}" >> ${outfile}
echo "mg5_version_minimal = $(cat __init__.py | awk '/minimal_mg5amcnlo_version/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g')" >> ${outfile}
echo "mg5_version_latest_validated = $(cat __init__.py | awk '/latest_validated_version/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g')" >> ${outfile}
echo "" >> ${outfile}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/archiver.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ on:
push:

tags:
# Include version tags such as 'cudacpp_for3.6.0_v1.0.0' or 'cudacpp_for3.6.0_v1.0.0_test001'
# Include version tags such as 'valassi_cudacpp_for3.6.0_v1.0.0' or 'valassi_cudacpp_for3.6.0_v1.0.0_test001'
# Include version tags such as 'cudacpp_for3.6.0_v1.00.00' or 'cudacpp_for3.6.0_v1.00.00_test001'
# Include version tags such as 'valassi_cudacpp_for3.6.0_v1.00.00' or 'valassi_cudacpp_for3.6.0_v1.00.00_test001'
- '*cudacpp_for*_v*'

# Exclude running tags such as 'cudacpp_for3.6.0_latest'
Expand Down
18 changes: 18 additions & 0 deletions epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,22 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com).

--------------------------------------------------------------------------------

## [Unreleased] - 2024-10-03

### Changed

- Updated cudacpp version to 1.00.01.

### Fixed

- Platform-specific issues
- AV ([#1011]) Added workaround for Floating Point Exceptions in vxxxxx in the HIP backend.

- Infrastructure issues
- AV ([#1013]) Fix release scripts to create 'v1.00.01' tags from a '(1,0,1)' python tuple.

--------------------------------------------------------------------------------

## [1.00.00] - 2024-10-03

### Added
Expand Down Expand Up @@ -35,6 +51,7 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com).
--------------------------------------------------------------------------------

[1.00.00]: https://github.com/madgraph5/madgraph4gpu/releases/tag/cudacpp_for3.6.0_v1.00.00
[Unreleased]: https://github.com/madgraph5/madgraph4gpu/releases/compare/cudacpp_for3.6.0_v1.00.00...HEAD

[#601]: https://github.com/madgraph5/madgraph4gpu/issues/601
[#846]: https://github.com/madgraph5/madgraph4gpu/issues/846
Expand All @@ -43,3 +60,4 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com).
[#959]: https://github.com/madgraph5/madgraph4gpu/issues/959
[#993]: https://github.com/madgraph5/madgraph4gpu/issues/993
[#1011]: https://github.com/madgraph5/madgraph4gpu/issues/1011
[#1013]: https://github.com/madgraph5/madgraph4gpu/issues/1013
6 changes: 5 additions & 1 deletion epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@
__author__ = 'Andrea Valassi'
__email__ = '[email protected]'

__version__ = (1,00,00) # NB the release infrastructure expects 1-digit major and 2-digit minor and patch versions (n,nn,nn)
# Plugin version (major,minor,patch) where major>1, 0<=minor<=99 and 0<=patch<=99
# The release infrastructure expects 'vN.NN.NN' tags with 1-digit major and 2-digit minor and patch versions
# and it takes care of converting the python tuple '(1,0,1)' into a version string 'v1.00.01'
# NB! Do not use '(1,00,01)' here: leading zeros in decimal integer literals are not permitted in python (#1013)
__version__ = (1,0,1)

minimal_mg5amcnlo_version = (3,6,0)
maximal_mg5amcnlo_version = (1000,1000,1000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,10 @@
}
else
{
const fptype emp = pvec0 / ( vmass * pp );
//printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp );
//const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?)
const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011
//printf( "DEBUG1011 (after emp): emp=%f\n", emp );
vc[2] = cxmake( hel0 * pp / vmass, 0. );
vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. );
if( pt != 0. )
Expand Down
16 changes: 8 additions & 8 deletions epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006308317184448242 
DEBUG: model prefixing takes 0.006434440612792969 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -150,7 +150,7 @@ INFO: Checking for minimal orders which gives processes.
INFO: Please specify coupling orders to bypass this step.
INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1
INFO: Process has 2 diagrams
1 processes with 2 diagrams generated in 0.005 s
1 processes with 2 diagrams generated in 0.004 s
Total: 1 processes with 2 diagrams
output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
Output will be done with PLUGIN: CUDACPP_OUTPUT
Expand Down Expand Up @@ -180,19 +180,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum
DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1551] 
DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1552] 
Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
Wrote files for 8 helas calls in 0.070 s
Wrote files for 8 helas calls in 0.069 s
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates 3 routines in 0.199 s
ALOHA: aloha creates 3 routines in 0.201 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates FFV2_4 routines
ALOHA: aloha creates 7 routines in 0.252 s
ALOHA: aloha creates 7 routines in 0.255 s
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
Expand Down Expand Up @@ -232,9 +232,9 @@ Type "launch" to generate events from this process, or see
Run "open index.html" to see more information about this process.
quit

real 0m2.042s
user 0m1.792s
sys 0m0.243s
real 0m2.097s
user 0m1.775s
sys 0m0.272s
Code generation completed in 2 seconds
************************************************************
* *
Expand Down
5 changes: 4 additions & 1 deletion epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ namespace mg5amcCpu
}
else
{
const fptype emp = pvec0 / ( vmass * pp );
//printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp );
//const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?)
const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011
//printf( "DEBUG1011 (after emp): emp=%f\n", emp );
vc[2] = cxmake( hel0 * pp / vmass, 0. );
vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. );
if( pt != 0. )
Expand Down
16 changes: 8 additions & 8 deletions epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006402015686035156 
DEBUG: model prefixing takes 0.0062215328216552734 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -150,7 +150,7 @@ INFO: Checking for minimal orders which gives processes.
INFO: Please specify coupling orders to bypass this step.
INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1
INFO: Process has 2 diagrams
1 processes with 2 diagrams generated in 0.004 s
1 processes with 2 diagrams generated in 0.005 s
Total: 1 processes with 2 diagrams
output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
Output will be done with PLUGIN: CUDACPP_OUTPUT
Expand All @@ -169,13 +169,13 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
FileWriter <class 'MG5aMC_PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/.
Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates FFV2_4 routines
ALOHA: aloha creates 4 routines in 0.265 s
ALOHA: aloha creates 4 routines in 0.267 s
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
Expand All @@ -194,7 +194,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/.
quit

real 0m0.659s
user 0m0.600s
sys 0m0.042s
Code generation completed in 0 seconds
real 0m0.781s
user 0m0.590s
sys 0m0.053s
Code generation completed in 1 seconds
5 changes: 4 additions & 1 deletion epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ namespace mg5amcCpu
}
else
{
const fptype emp = pvec0 / ( vmass * pp );
//printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp );
//const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?)
const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011
//printf( "DEBUG1011 (after emp): emp=%f\n", emp );
vc[2] = cxmake( hel0 * pp / vmass, 0. );
vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. );
if( pt != 0. )
Expand Down
12 changes: 6 additions & 6 deletions epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ generate g g > t t~
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006418943405151367 
DEBUG: model prefixing takes 0.0059719085693359375 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -181,12 +181,12 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1551] 
DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1552] 
Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
Wrote files for 10 helas calls in 0.071 s
Wrote files for 10 helas calls in 0.072 s
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 set of routines with options: P0
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates 2 routines in 0.153 s
ALOHA: aloha creates 2 routines in 0.150 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 set of routines with options: P0
ALOHA: aloha creates FFV1 routines
Expand Down Expand Up @@ -226,9 +226,9 @@ Type "launch" to generate events from this process, or see
Run "open index.html" to see more information about this process.
quit

real 0m1.900s
user 0m1.626s
sys 0m0.264s
real 0m1.997s
user 0m1.613s
sys 0m0.278s
Code generation completed in 2 seconds
************************************************************
* *
Expand Down
5 changes: 4 additions & 1 deletion epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ namespace mg5amcCpu
}
else
{
const fptype emp = pvec0 / ( vmass * pp );
//printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp );
//const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?)
const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011
//printf( "DEBUG1011 (after emp): emp=%f\n", emp );
vc[2] = cxmake( hel0 * pp / vmass, 0. );
vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. );
if( pt != 0. )
Expand Down
10 changes: 5 additions & 5 deletions epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ generate g g > t t~
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006226539611816406 
DEBUG: model prefixing takes 0.006254673004150391 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -174,7 +174,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 set of routines with options: P0
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates 2 routines in 0.143 s
ALOHA: aloha creates 2 routines in 0.144 s
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
Expand All @@ -189,7 +189,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/.
quit

real 0m0.559s
user 0m0.480s
sys 0m0.044s
real 0m0.532s
user 0m0.478s
sys 0m0.045s
Code generation completed in 0 seconds
5 changes: 4 additions & 1 deletion epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ namespace mg5amcCpu
}
else
{
const fptype emp = pvec0 / ( vmass * pp );
//printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp );
//const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?)
const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011
//printf( "DEBUG1011 (after emp): emp=%f\n", emp );
vc[2] = cxmake( hel0 * pp / vmass, 0. );
vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. );
if( pt != 0. )
Expand Down
20 changes: 10 additions & 10 deletions epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ generate g g > t t~
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006343364715576172 
DEBUG: model prefixing takes 0.006289482116699219 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -159,7 +159,7 @@ INFO: Please specify coupling orders to bypass this step.
INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED
INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2
INFO: Process has 16 diagrams
1 processes with 16 diagrams generated in 0.020 s
1 processes with 16 diagrams generated in 0.019 s
Total: 2 processes with 19 diagrams
output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32
Output will be done with PLUGIN: CUDACPP_OUTPUT
Expand Down Expand Up @@ -201,23 +201,23 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1527] 
DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1551] 
DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1552] 
Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s
Wrote files for 46 helas calls in 0.191 s
Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s
Wrote files for 46 helas calls in 0.189 s
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 routines
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates VVVV1 set of routines with options: P0
ALOHA: aloha creates VVVV3 set of routines with options: P0
ALOHA: aloha creates VVVV4 set of routines with options: P0
ALOHA: aloha creates 5 routines in 0.326 s
ALOHA: aloha creates 5 routines in 0.338 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 routines
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates VVVV1 set of routines with options: P0
ALOHA: aloha creates VVVV3 set of routines with options: P0
ALOHA: aloha creates VVVV4 set of routines with options: P0
ALOHA: aloha creates 10 routines in 0.306 s
ALOHA: aloha creates 10 routines in 0.311 s
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
<class 'aloha.create_aloha.AbstractRoutine'> VVV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
Expand Down Expand Up @@ -265,10 +265,10 @@ Type "launch" to generate events from this process, or see
Run "open index.html" to see more information about this process.
quit

real 0m2.598s
user 0m2.282s
sys 0m0.314s
Code generation completed in 3 seconds
real 0m2.618s
user 0m2.304s
sys 0m0.310s
Code generation completed in 2 seconds
************************************************************
* *
* W E L C O M E to *
Expand Down
5 changes: 4 additions & 1 deletion epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ namespace mg5amcCpu
}
else
{
const fptype emp = pvec0 / ( vmass * pp );
//printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp );
//const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?)
const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011
//printf( "DEBUG1011 (after emp): emp=%f\n", emp );
vc[2] = cxmake( hel0 * pp / vmass, 0. );
vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. );
if( pt != 0. )
Expand Down
Loading

0 comments on commit 5d43720

Please sign in to comment.