1- MADGRAPH_CUDA_ARCHITECTURE=
1+ MADGRAPH_CUDA_ARCHITECTURE=70
22MADGRAPH_HIP_ARCHITECTURE=
33
44HASBLAS=hasBlas
@@ -16,35 +16,29 @@ make: Nothing to be done for 'gtestlibs'.
1616
1717make USEBUILDDIR=1 BACKEND=cuda
1818make[1]: Entering directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
19- make[1]: Nothing to be done for 'all'.
2019make[1]: Leaving directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
2120
2221make USEBUILDDIR=1 BACKEND=cppnone
2322make[1]: Entering directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
24- make[1]: Nothing to be done for 'all'.
2523make[1]: Leaving directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
2624
2725make USEBUILDDIR=1 BACKEND=cppsse4
2826make[1]: Entering directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
29- make[1]: Nothing to be done for 'all'.
3027make[1]: Leaving directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
3128
3229make USEBUILDDIR=1 BACKEND=cppavx2
3330make[1]: Entering directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
34- make[1]: Nothing to be done for 'all'.
3531make[1]: Leaving directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
3632
3733make USEBUILDDIR=1 BACKEND=cpp512y
3834make[1]: Entering directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
39- make[1]: Nothing to be done for 'all'.
4035make[1]: Leaving directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
4136
4237make USEBUILDDIR=1 BACKEND=cpp512z
4338make[1]: Entering directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
44- make[1]: Nothing to be done for 'all'.
4539make[1]: Leaving directory '/data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
4640
47- DATE: 2025-10-11_15:13:43
41+ DATE: 2025-12-07_17:31:39
4842
4943HASBLAS=hasBlas
5044CUDACPP_RUNTIME_BLASCOLORSUM=
@@ -55,14 +49,14 @@ runExe /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubPr
5549Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.5.0)] [inlineHel=0] [hardcodePARAM=0]
5650Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
5751FP precision = DOUBLE (NaN/abnormal=0, zero=0)
58- EvtsPerSec[Rmb+ME] (23) = ( 6.456825e +07 ) sec^-1
59- EvtsPerSec[MatrixElems] (3) = ( 3.020579e +08 ) sec^-1
60- EvtsPerSec[MECalcOnly] (3a) = ( 3.872827e +08 ) sec^-1
52+ EvtsPerSec[Rmb+ME] (23) = ( 6.448256e +07 ) sec^-1
53+ EvtsPerSec[MatrixElems] (3) = ( 3.095942e +08 ) sec^-1
54+ EvtsPerSec[MECalcOnly] (3a) = ( 3.924818e +08 ) sec^-1
6155MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
62- TOTAL : 0.693291 sec
63- 2,729,119,040 cycles # 2.827 GHz
64- 4,039,185,150 instructions # 1.48 insn per cycle
65- 1.043410313 seconds time elapsed
56+ TOTAL : 0.779644 sec
57+ 2,832,606,212 cycles # 2.888 GHz
58+ 4,254,803,118 instructions # 1.50 insn per cycle
59+ 1.394928839 seconds time elapsed
6660.........................................................................
6761runNcu /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
6862==PROF== Profiling "calculate_jamps": launch__registers_per_thread 144
@@ -89,14 +83,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.5.0] [inlineHel=0
8983Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
9084FP precision = DOUBLE (NaN/abnormal=0, zero=0)
9185Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD)
92- EvtsPerSec[Rmb+ME] (23) = ( 1.019940e +06 ) sec^-1
93- EvtsPerSec[MatrixElems] (3) = ( 1.187870e +06 ) sec^-1
94- EvtsPerSec[MECalcOnly] (3a) = ( 1.187870e +06 ) sec^-1
86+ EvtsPerSec[Rmb+ME] (23) = ( 1.047604e +06 ) sec^-1
87+ EvtsPerSec[MatrixElems] (3) = ( 1.219439e +06 ) sec^-1
88+ EvtsPerSec[MECalcOnly] (3a) = ( 1.219439e +06 ) sec^-1
9589MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
96- TOTAL : 6.588033 sec
97- 19,038,044,386 cycles # 2.888 GHz
98- 46,485,585,356 instructions # 2.44 insn per cycle
99- 6.596061286 seconds time elapsed
90+ TOTAL : 6.411794 sec
91+ 19,014,240,782 cycles # 2.964 GHz
92+ 46,485,315,191 instructions # 2.44 insn per cycle
93+ 6.416861168 seconds time elapsed
10094=Symbols in CPPProcess_cpp.o= (~sse4: 482) (avx2: 0) (512y: 0) (512z: 0)
10195-------------------------------------------------------------------------
10296runTest /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
@@ -116,14 +110,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.5.0] [inlineHel=0
116110Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
117111FP precision = DOUBLE (NaN/abnormal=0, zero=0)
118112Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
119- EvtsPerSec[Rmb+ME] (23) = ( 1.557129e +06 ) sec^-1
120- EvtsPerSec[MatrixElems] (3) = ( 2.030035e +06 ) sec^-1
121- EvtsPerSec[MECalcOnly] (3a) = ( 2.030035e +06 ) sec^-1
113+ EvtsPerSec[Rmb+ME] (23) = ( 1.598686e +06 ) sec^-1
114+ EvtsPerSec[MatrixElems] (3) = ( 2.089132e +06 ) sec^-1
115+ EvtsPerSec[MECalcOnly] (3a) = ( 2.089132e +06 ) sec^-1
122116MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
123- TOTAL : 4.460811 sec
124- 12,939,620,485 cycles # 2.898 GHz
125- 31,810,901,247 instructions # 2.46 insn per cycle
126- 4.469139042 seconds time elapsed
117+ TOTAL : 4.346013 sec
118+ 12,961,637,078 cycles # 2.979 GHz
119+ 31,812,423,686 instructions # 2.45 insn per cycle
120+ 4.352494980 seconds time elapsed
127121=Symbols in CPPProcess_cpp.o= (~sse4: 1669) (avx2: 0) (512y: 0) (512z: 0)
128122-------------------------------------------------------------------------
129123runTest /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
@@ -143,14 +137,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.5.0] [inlineHel=0
143137Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
144138FP precision = DOUBLE (NaN/abnormal=0, zero=0)
145139Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
146- EvtsPerSec[Rmb+ME] (23) = ( 1.933537e +06 ) sec^-1
147- EvtsPerSec[MatrixElems] (3) = ( 2.681631e +06 ) sec^-1
148- EvtsPerSec[MECalcOnly] (3a) = ( 2.681631e +06 ) sec^-1
140+ EvtsPerSec[Rmb+ME] (23) = ( 1.995383e +06 ) sec^-1
141+ EvtsPerSec[MatrixElems] (3) = ( 2.769392e +06 ) sec^-1
142+ EvtsPerSec[MECalcOnly] (3a) = ( 2.769392e +06 ) sec^-1
149143MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
150- TOTAL : 3.671840 sec
151- 10,104,892,452 cycles # 2.749 GHz
152- 19,727,697,375 instructions # 1.95 insn per cycle
153- 3.679095535 seconds time elapsed
144+ TOTAL : 3.556154 sec
145+ 10,091,928,187 cycles # 2.835 GHz
146+ 19,729,979,199 instructions # 1.96 insn per cycle
147+ 3.561316676 seconds time elapsed
154148=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1917) (512y: 0) (512z: 0)
155149-------------------------------------------------------------------------
156150runTest /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
@@ -170,14 +164,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.5.0] [inlineHel=0
170164Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
171165FP precision = DOUBLE (NaN/abnormal=0, zero=0)
172166Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
173- EvtsPerSec[Rmb+ME] (23) = ( 1.989488e +06 ) sec^-1
174- EvtsPerSec[MatrixElems] (3) = ( 2.781185e +06 ) sec^-1
175- EvtsPerSec[MECalcOnly] (3a) = ( 2.781185e +06 ) sec^-1
167+ EvtsPerSec[Rmb+ME] (23) = ( 2.071101e +06 ) sec^-1
168+ EvtsPerSec[MatrixElems] (3) = ( 2.895575e +06 ) sec^-1
169+ EvtsPerSec[MECalcOnly] (3a) = ( 2.895575e +06 ) sec^-1
176170MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
177- TOTAL : 3.576826 sec
178- 9,900,381,139 cycles # 2.765 GHz
179- 19,380,047,753 instructions # 1.96 insn per cycle
180- 3.585735108 seconds time elapsed
171+ TOTAL : 3.437536 sec
172+ 9,847,578,789 cycles # 2.862 GHz
173+ 19,380,355,138 instructions # 1.97 insn per cycle
174+ 3.442447176 seconds time elapsed
181175=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 180) (512z: 0)
182176-------------------------------------------------------------------------
183177runTest /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
@@ -197,14 +191,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.5.0] [inlineHel=0
197191Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
198192FP precision = DOUBLE (NaN/abnormal=0, zero=0)
199193Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
200- EvtsPerSec[Rmb+ME] (23) = ( 1.671348e +06 ) sec^-1
201- EvtsPerSec[MatrixElems] (3) = ( 2.193135e +06 ) sec^-1
202- EvtsPerSec[MECalcOnly] (3a) = ( 2.193135e +06 ) sec^-1
194+ EvtsPerSec[Rmb+ME] (23) = ( 1.773261e +06 ) sec^-1
195+ EvtsPerSec[MatrixElems] (3) = ( 2.352997e +06 ) sec^-1
196+ EvtsPerSec[MECalcOnly] (3a) = ( 2.352997e +06 ) sec^-1
203197MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0
204- TOTAL : 4.184170 sec
205- 8,626,596,296 cycles # 2.060 GHz
206- 15,802,085,882 instructions # 1.83 insn per cycle
207- 4.189889070 seconds time elapsed
198+ TOTAL : 3.952172 sec
199+ 8,636,738,592 cycles # 2.183 GHz
200+ 15,800,904,624 instructions # 1.83 insn per cycle
201+ 3.957156027 seconds time elapsed
208202=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 873) (512y: 156) (512z: 1263)
209203-------------------------------------------------------------------------
210204runTest /data/avalassi/GPU2025/test-madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
0 commit comments