-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconformer-k2-pruned-max-frames-30000.txt
394 lines (393 loc) · 97 KB
/
conformer-k2-pruned-max-frames-30000.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------
Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg CPU Mem Self CPU Mem CUDA Mem Self CUDA Mem # of Calls
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------
aten::mm 0.80% 224.961ms 1.30% 366.698ms 71.066us 5.241s 36.34% 5.241s 1.016ms 0 b 0 b 96.80 Gb 96.80 Gb 5160
volta_sgemm_128x64_nt 0.00% 0.000us 0.00% 0.000us 0.000us 1.745s 12.10% 1.745s 1.026ms 0 b 0 b 0 b 0 b 1700
volta_sgemm_128x64_tn 0.00% 0.000us 0.00% 0.000us 0.000us 1.468s 10.18% 1.468s 965.539us 0 b 0 b 0 b 0 b 1520
volta_sgemm_128x64_nn 0.00% 0.000us 0.00% 0.000us 0.000us 1.466s 10.16% 1.466s 1.004ms 0 b 0 b 0 b 0 b 1460
aten::bmm 0.27% 76.206ms 0.54% 153.191ms 69.005us 1.462s 10.13% 1.462s 658.412us 0 b 0 b 115.64 Gb 0 b 2220
aten::mul 3.41% 962.524ms 5.80% 1.637s 48.900us 1.329s 9.21% 1.329s 39.704us 0 b 0 b 301.17 Gb 301.17 Gb 33472
volta_sgemm_128x32_nn 0.00% 0.000us 0.00% 0.000us 0.000us 875.826ms 6.07% 875.826ms 1.095ms 0 b 0 b 0 b 0 b 800
aten::cudnn_convolution_backward_weight 0.17% 47.404ms 0.38% 107.437ms 198.957us 873.087ms 6.05% 873.087ms 1.617ms 0 b 0 b 763.32 Mb -1.17 Mb 540
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 862.160ms 5.98% 862.160ms 55.409us 0 b 0 b 0 b 0 b 15560
aten::copy_ 0.56% 157.456ms 6.30% 1.777s 174.231us 712.349ms 4.94% 712.522ms 69.855us 0 b -1.10 Mb 0 b 0 b 10200
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 650.314ms 4.51% 650.314ms 93.719us 0 b 0 b 0 b 0 b 6939
aten::add 0.88% 248.212ms 1.43% 402.188ms 51.922us 640.737ms 4.44% 640.737ms 82.718us 0 b 0 b 147.25 Gb 147.25 Gb 7746
aten::sub 0.82% 230.405ms 1.44% 406.494ms 51.717us 629.486ms 4.36% 629.486ms 80.087us 0 b 0 b 158.10 Gb 158.10 Gb 7860
volta_scudnn_128x128_stridedB_splitK_interior_nn_v1 0.00% 0.000us 0.00% 0.000us 0.000us 493.426ms 3.42% 493.426ms 2.056ms 0 b 0 b 0 b 0 b 240
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 440.839ms 3.06% 440.839ms 27.035us 0 b 0 b 0 b 0 b 16306
aten::sum 2.11% 595.390ms 2.96% 833.929ms 84.065us 421.858ms 2.92% 421.858ms 42.526us 0 b 0 b 4.28 Gb 4.28 Gb 9920
aten::cudnn_convolution_backward_input 0.17% 48.288ms 0.31% 88.640ms 164.148us 411.452ms 2.85% 411.452ms 761.948us 0 b 0 b 9.47 Gb 9.46 Gb 540
volta_sgemm_32x128_tn 0.00% 0.000us 0.00% 0.000us 0.000us 364.476ms 2.53% 364.476ms 455.595us 0 b 0 b 0 b 0 b 800
aten::cudnn_convolution 0.38% 106.323ms 0.47% 133.606ms 247.419us 323.937ms 2.25% 323.937ms 599.883us 0 b 0 b 13.88 Gb 13.85 Gb 540
volta_sgemm_128x128_tn 0.00% 0.000us 0.00% 0.000us 0.000us 315.693ms 2.19% 315.693ms 1.578ms 0 b 0 b 0 b 0 b 200
volta_scudnn_128x64_relu_interior_nn_v1 0.00% 0.000us 0.00% 0.000us 0.000us 309.938ms 2.15% 309.938ms 619.876us 0 b 0 b 0 b 0 b 500
void at::native::reduce_kernel<128, 4, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 304.814ms 2.11% 304.814ms 101.200us 0 b 0 b 0 b 0 b 3012
volta_scudnn_128x64_stridedB_splitK_interior_nn_v1 0.00% 0.000us 0.00% 0.000us 0.000us 301.367ms 2.09% 301.367ms 1.256ms 0 b 0 b 0 b 0 b 240
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 296.985ms 2.06% 296.985ms 87.865us 0 b 0 b 0 b 0 b 3380
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 280.235ms 1.94% 280.235ms 21.459us 0 b 0 b 0 b 0 b 13059
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 274.567ms 1.90% 274.567ms 80.283us 0 b 0 b 0 b 0 b 3420
volta_scudnn_128x64_stridedB_interior_nn_v1 0.00% 0.000us 0.00% 0.000us 0.000us 274.240ms 1.90% 274.240ms 571.333us 0 b 0 b 0 b 0 b 480
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 265.445ms 1.84% 265.445ms 51.845us 0 b 0 b 0 b 0 b 5120
volta_sgemm_128x32_nt 0.00% 0.000us 0.00% 0.000us 0.000us 263.396ms 1.83% 263.396ms 1.097ms 0 b 0 b 0 b 0 b 240
aten::abs 0.88% 249.259ms 1.97% 556.858ms 107.918us 260.949ms 1.81% 521.898ms 101.143us 0 b 0 b 174.99 Gb 0 b 5160
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 260.949ms 1.81% 260.949ms 101.221us 0 b 0 b 0 b 0 b 2578
aten::fill_ 0.46% 128.538ms 2.98% 840.693ms 65.170us 234.959ms 1.63% 234.959ms 18.214us 0 b 0 b 0 b 0 b 12900
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 234.831ms 1.63% 234.831ms 18.789us 0 b 0 b 0 b 0 b 12498
aten::add_ 0.69% 193.597ms 1.48% 418.277ms 31.592us 234.192ms 1.62% 234.192ms 17.688us 0 b 0 b 0 b 0 b 13240
aten::_conv_depthwise2d_backward 0.04% 11.424ms 0.11% 29.857ms 114.835us 230.600ms 1.60% 230.600ms 886.923us 0 b 0 b 3.37 Gb 0 b 260
aten::mean 0.58% 163.851ms 0.79% 224.188ms 78.939us 193.948ms 1.34% 193.948ms 68.292us 0 b 0 b 18.61 Mb 18.61 Mb 2840
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 187.460ms 1.30% 187.460ms 38.328us 0 b 0 b 0 b 0 b 4891
Memcpy DtoD (Device -> Device) 0.00% 0.000us 0.00% 0.000us 0.000us 170.750ms 1.18% 170.750ms 139.959us 0 b 0 b 0 b 0 b 1220
aten::_masked_scale 0.12% 33.364ms 0.42% 118.411ms 81.103us 170.215ms 1.18% 170.215ms 116.586us 0 b 0 b 39.73 Gb 0 b 1460
void at::native::(anonymous namespace)::conv_depthwi... 0.00% 0.000us 0.00% 0.000us 0.000us 161.373ms 1.12% 161.373ms 620.665us 0 b 0 b 0 b 0 b 260
aten::_fused_dropout 0.51% 143.170ms 1.25% 352.453ms 238.144us 160.717ms 1.11% 160.717ms 108.593us 0 b 0 b 49.75 Gb 0 b 1480
void at::native::(anonymous namespace)::fused_dropou... 0.00% 0.000us 0.00% 0.000us 0.000us 160.717ms 1.11% 160.717ms 108.593us 0 b 0 b 0 b 0 b 1480
aten::masked_fill_ 0.07% 20.853ms 0.15% 41.130ms 82.260us 145.390ms 1.01% 145.390ms 290.780us 0 b 0 b 0 b 0 b 500
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 145.364ms 1.01% 145.364ms 302.842us 0 b 0 b 0 b 0 b 480
aten::_softmax 0.03% 8.395ms 0.05% 12.705ms 52.938us 130.369ms 0.90% 130.369ms 543.204us 0 b 0 b 23.06 Gb 23.06 Gb 240
void (anonymous namespace)::softmax_warp_forward<flo... 0.00% 0.000us 0.00% 0.000us 0.000us 130.369ms 0.90% 130.369ms 543.204us 0 b 0 b 0 b 0 b 240
aten::_softmax_backward_data 0.04% 10.611ms 0.09% 24.415ms 101.729us 118.853ms 0.82% 240.176ms 1.001ms 0 b 0 b 23.06 Gb -13.54 Mb 240
void (anonymous namespace)::softmax_warp_backward<fl... 0.00% 0.000us 0.00% 0.000us 0.000us 118.853ms 0.82% 118.853ms 495.221us 0 b 0 b 0 b 0 b 240
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 115.527ms 0.80% 115.527ms 56.081us 0 b 0 b 0 b 0 b 2060
void at::native::reduce_kernel<128, 4, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 111.955ms 0.78% 111.955ms 74.637us 0 b 0 b 0 b 0 b 1500
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 110.072ms 0.76% 110.072ms 15.983us 0 b 0 b 0 b 0 b 6887
aten::gt 0.46% 131.081ms 0.61% 173.104ms 67.095us 107.188ms 0.74% 107.188ms 41.546us 0 b 0 b 11.02 Gb 11.02 Gb 2580
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 107.155ms 0.74% 107.155ms 41.857us 0 b 0 b 0 b 0 b 2560
volta_sgemm_64x64_nt 0.00% 0.000us 0.00% 0.000us 0.000us 96.158ms 0.67% 96.158ms 343.421us 0 b 0 b 0 b 0 b 280
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 95.004ms 0.66% 95.004ms 190.008us 0 b 0 b 0 b 0 b 500
aten::sigmoid 0.07% 20.652ms 0.12% 32.475ms 41.635us 90.546ms 0.63% 90.546ms 116.085us 0 b 0 b 33.60 Gb 33.60 Gb 780
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 90.546ms 0.63% 90.546ms 116.085us 0 b 0 b 0 b 0 b 780
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 79.561ms 0.55% 79.561ms 59.374us 0 b 0 b 0 b 0 b 1340
void cudnn::detail::dgrad2d_alg1_1<float, 0, 6, 6, 5... 0.00% 0.000us 0.00% 0.000us 0.000us 78.237ms 0.54% 78.237ms 3.912ms 0 b 0 b 0 b 0 b 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 75.211ms 0.52% 75.211ms 78.345us 0 b 0 b 0 b 0 b 960
void at::native::(anonymous namespace)::conv_depthwi... 0.00% 0.000us 0.00% 0.000us 0.000us 69.227ms 0.48% 69.227ms 266.258us 0 b 0 b 0 b 0 b 260
aten::_cat 0.06% 16.639ms 0.12% 33.835ms 80.560us 50.086ms 0.35% 50.086ms 119.252us 0 b 0 b 10.32 Gb 0 b 420
void at::native::(anonymous namespace)::CatArrayBatc... 0.00% 0.000us 0.00% 0.000us 0.000us 48.605ms 0.34% 48.605ms 186.942us 0 b 0 b 0 b 0 b 260
void cudnn::detail::wgrad_alg0_engine<float, 512, 6,... 0.00% 0.000us 0.00% 0.000us 0.000us 43.878ms 0.30% 43.878ms 1.097ms 0 b 0 b 0 b 0 b 40
volta_sgemm_64x32_sliced1x4_nt 0.00% 0.000us 0.00% 0.000us 0.000us 43.719ms 0.30% 43.719ms 728.650us 0 b 0 b 0 b 0 b 60
aten::_conv_depthwise2d 0.09% 24.284ms 0.13% 35.985ms 138.404us 42.331ms 0.29% 42.331ms 162.812us 0 b 0 b 3.35 Gb 0 b 260
void at::native::(anonymous namespace)::conv_depthwi... 0.00% 0.000us 0.00% 0.000us 0.000us 42.331ms 0.29% 42.331ms 162.812us 0 b 0 b 0 b 0 b 260
volta_sgemm_128x128_nt 0.00% 0.000us 0.00% 0.000us 0.000us 38.279ms 0.27% 38.279ms 1.914ms 0 b 0 b 0 b 0 b 20
void cudnn::detail::wgrad_alg0_engine<float, 128, 6,... 0.00% 0.000us 0.00% 0.000us 0.000us 30.528ms 0.21% 30.528ms 1.526ms 0 b 0 b 0 b 0 b 20
MutualInformationRecursionFunction 0.12% 33.908ms 0.33% 92.553ms 2.314ms 29.231ms 0.20% 29.941ms 748.525us 0 b -2.50 Kb 241.15 Mb -246.98 Mb 40
void cudnn::detail::dgrad_engine<float, 128, 6, 8, 3... 0.00% 0.000us 0.00% 0.000us 0.000us 26.526ms 0.18% 26.526ms 1.263ms 0 b 0 b 0 b 0 b 21
aten::glu_backward 0.03% 7.239ms 0.06% 16.710ms 69.625us 24.153ms 0.17% 24.153ms 100.638us 0 b 0 b 6.58 Gb 0 b 240
void at::native::glu_backward_kernel<float, OffsetCa... 0.00% 0.000us 0.00% 0.000us 0.000us 24.153ms 0.17% 24.153ms 100.638us 0 b 0 b 0 b 0 b 240
void cudnn::detail::dgrad_engine<float, 128, 6, 7, 3... 0.00% 0.000us 0.00% 0.000us 0.000us 23.405ms 0.16% 23.405ms 1.232ms 0 b 0 b 0 b 0 b 19
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 22.212ms 0.15% 22.212ms 3.843us 0 b 0 b 0 b 0 b 5780
aten::div 0.10% 28.399ms 0.14% 39.290ms 57.779us 21.884ms 0.15% 21.884ms 32.182us 0 b 0 b 6.91 Gb 6.91 Gb 680
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 19.894ms 0.14% 19.894ms 38.258us 0 b 0 b 0 b 0 b 520
aten::scatter_add_ 0.02% 4.524ms 0.03% 7.074ms 50.529us 18.360ms 0.13% 18.360ms 131.143us 0 b 0 b 0 b 0 b 140
void at::native::_scatter_gather_elementwise_kernel<... 0.00% 0.000us 0.00% 0.000us 0.000us 18.360ms 0.13% 18.360ms 131.143us 0 b 0 b 0 b 0 b 140
aten::exp 0.71% 200.415ms 1.07% 300.934ms 52.245us 16.557ms 0.11% 16.557ms 2.874us 0 b 0 b 1.68 Gb 1.68 Gb 5760
Memset (Device) 0.00% 0.000us 0.00% 0.000us 0.000us 15.962ms 0.11% 15.962ms 1.503us 0 b 0 b 0 b 0 b 10620
volta_sgemm_128x32_sliced1x4_nn 0.00% 0.000us 0.00% 0.000us 0.000us 15.728ms 0.11% 15.728ms 65.533us 0 b 0 b 0 b 0 b 240
void k2::mutual_information_kernel<float, 32>(at::Ge... 0.00% 0.000us 0.00% 0.000us 0.000us 15.205ms 0.11% 15.205ms 21.177us 0 b 0 b 0 b 0 b 718
aten::glu 0.09% 25.315ms 0.23% 63.600ms 265.000us 14.839ms 0.10% 14.839ms 61.829us 0 b 0 b 3.30 Gb 3.30 Gb 240
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 14.839ms 0.10% 14.839ms 61.829us 0 b 0 b 0 b 0 b 240
void k2::mutual_information_backward_kernel<float, 3... 0.00% 0.000us 0.00% 0.000us 0.000us 14.026ms 0.10% 14.026ms 19.535us 0 b 0 b 0 b 0 b 718
aten::equal 0.23% 65.726ms 1.24% 348.747ms 726.556us 13.326ms 0.09% 21.480ms 44.750us 0 b 0 b 0 b -240.00 Kb 480
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 13.326ms 0.09% 13.326ms 27.762us 0 b 0 b 0 b 0 b 480
aten::tanh_backward 0.00% 642.000us 0.00% 993.000us 49.650us 11.839ms 0.08% 11.839ms 591.950us 0 b 0 b 1.37 Gb 1.37 Gb 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 11.839ms 0.08% 11.839ms 591.950us 0 b 0 b 0 b 0 b 20
aten::pow 0.27% 75.101ms 0.37% 103.612ms 99.627us 11.585ms 0.08% 24.055ms 23.130us 0 b 0 b 7.14 Gb 7.14 Gb 1040
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 9.438ms 0.07% 9.438ms 36.300us 0 b 0 b 0 b 0 b 260
void scalePackedTensor_kernel<float, float>(cudnnTen... 0.00% 0.000us 0.00% 0.000us 0.000us 9.234ms 0.06% 9.234ms 17.100us 0 b 0 b 0 b 0 b 540
aten::all 0.18% 52.160ms 0.23% 65.251ms 130.502us 7.559ms 0.05% 7.559ms 15.118us 0 b 0 b 250.00 Kb 250.00 Kb 500
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 6.890ms 0.05% 6.890ms 13.780us 0 b 0 b 0 b 0 b 500
volta_scudnn_128x32_relu_small_nn_v1 0.00% 0.000us 0.00% 0.000us 0.000us 6.603ms 0.05% 6.603ms 330.150us 0 b 0 b 0 b 0 b 20
volta_scudnn_128x32_relu_interior_nn_v1 0.00% 0.000us 0.00% 0.000us 0.000us 6.356ms 0.04% 6.356ms 317.800us 0 b 0 b 0 b 0 b 20
aten::exp_ 0.00% 240.000us 0.00% 478.000us 23.900us 5.655ms 0.04% 5.655ms 282.750us 0 b 0 b 0 b 0 b 20
aten::gather 0.05% 12.794ms 0.06% 16.134ms 100.838us 5.229ms 0.04% 5.229ms 32.681us 0 b 0 b 1.56 Gb 1.56 Gb 160
void at::native::_scatter_gather_elementwise_kernel<... 0.00% 0.000us 0.00% 0.000us 0.000us 5.122ms 0.04% 5.122ms 36.586us 0 b 0 b 0 b 0 b 140
volta_sgemm_32x128_nn 0.00% 0.000us 0.00% 0.000us 0.000us 3.966ms 0.03% 3.966ms 99.150us 0 b 0 b 0 b 0 b 40
aten::tanh 0.00% 804.000us 0.00% 1.210ms 60.500us 3.644ms 0.03% 3.644ms 182.200us 0 b 0 b 1.37 Gb 1.37 Gb 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 3.644ms 0.03% 3.644ms 182.200us 0 b 0 b 0 b 0 b 20
aten::clamp_min 0.94% 265.518ms 1.84% 518.418ms 123.433us 3.230ms 0.02% 6.460ms 1.538us 0 b 0 b 155.84 Mb 0 b 4200
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 3.230ms 0.02% 3.230ms 1.538us 0 b 0 b 0 b 0 b 2100
aten::amax 0.01% 1.802ms 0.02% 4.402ms 110.050us 2.539ms 0.02% 5.078ms 126.950us 0 b 0 b 5.49 Mb 0 b 40
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 2.539ms 0.02% 2.539ms 126.950us 0 b 0 b 0 b 0 b 20
rnnt_loss_pruned 0.43% 122.602ms 1.53% 432.027ms 21.601ms 2.297ms 0.02% 100.623ms 5.031ms -80 b -12.89 Kb 5.03 Gb -2.59 Gb 20
cudnn::gemm::computeOffsetsKernel(cudnn::gemm::Compu... 0.00% 0.000us 0.00% 0.000us 0.000us 2.011ms 0.01% 2.011ms 1.972us 0 b 0 b 0 b 0 b 1020
volta_sgemm_128x32_tn 0.00% 0.000us 0.00% 0.000us 0.000us 1.992ms 0.01% 1.992ms 99.600us 0 b 0 b 0 b 0 b 20
aten::neg 0.03% 9.071ms 0.05% 14.267ms 41.962us 1.947ms 0.01% 1.947ms 5.726us 0 b 0 b 602.59 Mb 602.59 Mb 340
aten::lt 0.21% 58.921ms 0.28% 79.356ms 60.118us 1.850ms 0.01% 1.850ms 1.402us 0 b 0 b 1.73 Mb 1.73 Mb 1320
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 1.715ms 0.01% 1.715ms 1.340us 0 b 0 b 0 b 0 b 1280
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 1.684ms 0.01% 1.684ms 6.477us 0 b 0 b 0 b 0 b 260
void k2::cub::DeviceScanKernel<k2::cub::DeviceScanPo... 0.00% 0.000us 0.00% 0.000us 0.000us 1.608ms 0.01% 1.608ms 2.513us 0 b 0 b 0 b 0 b 640
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 1.579ms 0.01% 1.579ms 6.073us 0 b 0 b 0 b 0 b 260
aten::addmv_ 0.00% 831.000us 0.01% 1.669ms 41.725us 1.502ms 0.01% 1.502ms 37.550us 0 b 0 b 0 b 0 b 40
aten::embedding_dense_backward 0.00% 887.000us 0.01% 4.032ms 201.600us 1.491ms 0.01% 1.964ms 98.200us 0 b 0 b 19.53 Mb -68.28 Mb 20
void at::native::(anonymous namespace)::embedding_ba... 0.00% 0.000us 0.00% 0.000us 0.000us 1.491ms 0.01% 1.491ms 74.550us 0 b 0 b 0 b 0 b 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 1.458ms 0.01% 1.458ms 18.225us 0 b 0 b 0 b 0 b 80
aten::max 0.04% 10.029ms 0.06% 15.568ms 155.680us 1.383ms 0.01% 1.383ms 13.830us 0 b 0 b 2.10 Mb 0 b 100
void at::native::(anonymous namespace)::CatArrayBatc... 0.00% 0.000us 0.00% 0.000us 0.000us 1.377ms 0.01% 1.377ms 11.475us 0 b 0 b 0 b 0 b 120
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 1.000ms 0.01% 1.000ms 25.000us 0 b 0 b 0 b 0 b 40
Memcpy DtoH (Device -> Pageable) 0.00% 0.000us 0.00% 0.000us 0.000us 997.000us 0.01% 997.000us 1.608us 0 b 0 b 0 b 0 b 620
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 987.000us 0.01% 987.000us 9.870us 0 b 0 b 0 b 0 b 100
aten::cumsum 0.00% 1.087ms 0.01% 1.477ms 73.850us 949.000us 0.01% 949.000us 47.450us 0 b 0 b 63.13 Mb 63.13 Mb 20
void at::native::tensor_kernel_scan_outer_dim<float,... 0.00% 0.000us 0.00% 0.000us 0.000us 949.000us 0.01% 949.000us 47.450us 0 b 0 b 0 b 0 b 20
aten::remainder 0.01% 2.946ms 0.01% 3.644ms 91.100us 934.000us 0.01% 934.000us 23.350us 0 b 0 b 238.35 Mb 238.35 Mb 40
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 934.000us 0.01% 934.000us 23.350us 0 b 0 b 0 b 0 b 40
void gemv2N_kernel<int, int, float, float, float, 12... 0.00% 0.000us 0.00% 0.000us 0.000us 889.000us 0.01% 889.000us 44.450us 0 b 0 b 0 b 0 b 20
aten::_local_scalar_dense 0.02% 5.156ms 0.76% 213.047ms 266.309us 871.000us 0.01% 871.000us 1.089us 0 b 0 b 0 b 0 b 800
aten::mul_ 0.00% 1.304ms 0.01% 2.512ms 31.400us 854.000us 0.01% 854.000us 10.675us 0 b 0 b 0 b 0 b 80
void at::native::_scatter_gather_elementwise_kernel<... 0.00% 0.000us 0.00% 0.000us 0.000us 829.000us 0.01% 829.000us 10.363us 0 b 0 b 0 b 0 b 80
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 737.000us 0.01% 737.000us 7.370us 0 b 0 b 0 b 0 b 100
void k2::cub::DeviceScanInitKernel<k2::cub::ScanTile... 0.00% 0.000us 0.00% 0.000us 0.000us 689.000us 0.00% 689.000us 1.077us 0 b 0 b 0 b 0 b 640
aten::scatter_ 0.01% 3.348ms 0.02% 4.875ms 60.938us 660.000us 0.00% 660.000us 8.250us 0 b 0 b 0 b 0 b 80
cudnn::gemm::computeWgradSplitKOffsetsKernel(cudnn::... 0.00% 0.000us 0.00% 0.000us 0.000us 657.000us 0.00% 657.000us 1.369us 0 b 0 b 0 b 0 b 480
cudnn::gemm::computeWgradBOffsetsKernel(cudnn::gemm:... 0.00% 0.000us 0.00% 0.000us 0.000us 653.000us 0.00% 653.000us 1.360us 0 b 0 b 0 b 0 b 480
cudnn::gemm::computeBOffsetsKernel(cudnn::gemm::Comp... 0.00% 0.000us 0.00% 0.000us 0.000us 615.000us 0.00% 615.000us 1.281us 0 b 0 b 0 b 0 b 480
void gemv2T_kernel_val<int, int, float, float, float... 0.00% 0.000us 0.00% 0.000us 0.000us 613.000us 0.00% 613.000us 30.650us 0 b 0 b 0 b 0 b 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 568.000us 0.00% 568.000us 2.185us 0 b 0 b 0 b 0 b 260
aten::sub_ 0.00% 1.056ms 0.01% 1.680ms 42.000us 464.000us 0.00% 464.000us 11.600us 0 b 0 b 0 b 0 b 40
aten::threshold_backward 0.00% 860.000us 0.00% 1.279ms 63.950us 446.000us 0.00% 446.000us 22.300us 0 b 0 b 67.91 Mb 67.91 Mb 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 446.000us 0.00% 446.000us 22.300us 0 b 0 b 0 b 0 b 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 445.000us 0.00% 445.000us 5.562us 0 b 0 b 0 b 0 b 80
Memcpy HtoD (Pageable -> Device) 0.00% 0.000us 0.00% 0.000us 0.000us 434.000us 0.00% 434.000us 3.100us 0 b 0 b 0 b 0 b 140
void at::native::reduce_kernel<256, 2, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 430.000us 0.00% 430.000us 21.500us 0 b 0 b 0 b 0 b 20
aten::argmax 0.00% 1.273ms 0.01% 1.685ms 84.250us 410.000us 0.00% 410.000us 20.500us 0 b 0 b 1.10 Mb 1.10 Mb 20
aten::scatter 0.01% 1.935ms 0.02% 4.573ms 114.325us 405.000us 0.00% 942.000us 23.550us 0 b 0 b 120.16 Mb 120.16 Mb 40
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 345.000us 0.00% 345.000us 3.450us 0 b 0 b 0 b 0 b 100
aten::log 0.01% 2.300ms 0.01% 3.551ms 44.388us 279.000us 0.00% 279.000us 3.487us 0 b 0 b 62.09 Mb 62.09 Mb 80
aten::uniform_ 0.01% 2.116ms 0.01% 2.611ms 10.042us 278.000us 0.00% 278.000us 1.069us 0 b 0 b 0 b 0 b 260
_ZN2at6native89_GLOBAL__N__65_tmpxft_00008f3f_000000... 0.00% 0.000us 0.00% 0.000us 0.000us 278.000us 0.00% 278.000us 13.900us 0 b 0 b 0 b 0 b 20
aten::index_select 0.01% 1.532ms 0.01% 2.431ms 121.550us 255.000us 0.00% 255.000us 12.750us 0 b 0 b 67.91 Mb 0 b 20
void at::native::(anonymous namespace)::indexSelectL... 0.00% 0.000us 0.00% 0.000us 0.000us 255.000us 0.00% 255.000us 12.750us 0 b 0 b 0 b 0 b 20
void at::native::_scatter_gather_elementwise_kernel<... 0.00% 0.000us 0.00% 0.000us 0.000us 236.000us 0.00% 236.000us 5.900us 0 b 0 b 0 b 0 b 40
aten::_s_where 0.01% 2.245ms 0.02% 4.281ms 71.350us 220.000us 0.00% 220.000us 3.667us 0 b 0 b 2.21 Mb 0 b 60
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 220.000us 0.00% 220.000us 3.667us 0 b 0 b 0 b 0 b 60
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 220.000us 0.00% 220.000us 18.333us 0 b 0 b 0 b 0 b 12
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 205.000us 0.00% 205.000us 5.125us 0 b 0 b 0 b 0 b 40
aten::arange 0.04% 10.406ms 0.12% 33.181ms 118.504us 201.000us 0.00% 402.000us 1.436us 140.28 Kb 0 b 556.00 Kb 0 b 280
void (anonymous namespace)::elementwise_kernel_with_... 0.00% 0.000us 0.00% 0.000us 0.000us 201.000us 0.00% 201.000us 1.675us 0 b 0 b 0 b 0 b 120
void at::native::reduce_kernel<512, 1, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 193.000us 0.00% 193.000us 4.825us 0 b 0 b 0 b 0 b 40
void at::native::reduce_kernel<256, 2, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 190.000us 0.00% 190.000us 9.500us 0 b 0 b 0 b 0 b 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 186.000us 0.00% 186.000us 3.100us 0 b 0 b 0 b 0 b 60
aten::ge 0.01% 1.658ms 0.01% 2.484ms 62.100us 147.000us 0.00% 147.000us 3.675us 0 b 0 b 156.00 Kb 156.00 Kb 40
void at::native::reduce_kernel<256, 2, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 145.000us 0.00% 145.000us 24.167us 0 b 0 b 0 b 0 b 6
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 133.000us 0.00% 133.000us 2.217us 0 b 0 b 0 b 0 b 60
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 128.000us 0.00% 128.000us 1.600us 0 b 0 b 0 b 0 b 80
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 110.000us 0.00% 110.000us 5.500us 0 b 0 b 0 b 0 b 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 107.000us 0.00% 107.000us 5.350us 0 b 0 b 0 b 0 b 20
void at::native::_scatter_gather_elementwise_kernel<... 0.00% 0.000us 0.00% 0.000us 0.000us 107.000us 0.00% 107.000us 5.350us 0 b 0 b 0 b 0 b 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 104.000us 0.00% 104.000us 2.600us 0 b 0 b 0 b 0 b 40
void at::native::(anonymous namespace)::CatArrayBatc... 0.00% 0.000us 0.00% 0.000us 0.000us 104.000us 0.00% 104.000us 2.600us 0 b 0 b 0 b 0 b 40
aten::__rshift__ 0.01% 3.205ms 0.02% 4.563ms 114.075us 101.000us 0.00% 101.000us 2.525us 0 b 0 b 20.00 Kb 0 b 40
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 101.000us 0.00% 101.000us 2.525us 0 b 0 b 0 b 0 b 40
aten::random_ 0.00% 429.000us 0.00% 843.000us 42.150us 99.000us 0.00% 99.000us 4.950us 0 b 0 b 0 b 0 b 20
void at::native::(anonymous namespace)::distribution... 0.00% 0.000us 0.00% 0.000us 0.000us 99.000us 0.00% 99.000us 4.950us 0 b 0 b 0 b 0 b 20
void at::native::unrolled_elementwise_kernel<at::nat... 0.00% 0.000us 0.00% 0.000us 0.000us 94.000us 0.00% 94.000us 2.350us 0 b 0 b 0 b 0 b 40
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 72.000us 0.00% 72.000us 1.800us 0 b 0 b 0 b 0 b 40
aten::log_ 0.00% 249.000us 0.00% 522.000us 26.100us 66.000us 0.00% 66.000us 3.300us 0 b 0 b 0 b 0 b 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 58.000us 0.00% 58.000us 1.450us 0 b 0 b 0 b 0 b 40
void at::native::reduce_kernel<128, 4, at::native::R... 0.00% 0.000us 0.00% 0.000us 0.000us 45.000us 0.00% 45.000us 22.500us 0 b 0 b 0 b 0 b 2
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 40.000us 0.00% 40.000us 2.000us 0 b 0 b 0 b 0 b 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 33.000us 0.00% 33.000us 1.650us 0 b 0 b 0 b 0 b 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 26.000us 0.00% 26.000us 1.300us 0 b 0 b 0 b 0 b 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 25.000us 0.00% 25.000us 1.250us 0 b 0 b 0 b 0 b 20
aten::eq 0.00% 999.000us 0.00% 1.343ms 67.150us 25.000us 0.00% 25.000us 1.250us 0 b 0 b 706.00 Kb 706.00 Kb 20
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 25.000us 0.00% 25.000us 1.250us 0 b 0 b 0 b 0 b 20
aten::zeros 0.34% 94.733ms 0.63% 177.161ms 70.864us 0.000us 0.00% 216.502ms 86.601us 5.16 Kb 0 b 114.06 Gb 0 b 2500
aten::empty 0.94% 263.836ms 0.94% 263.836ms 13.023us 0.000us 0.00% 0.000us 0.000us 1.13 Mb 1.13 Mb 335.51 Gb 335.51 Gb 20260
aten::zero_ 0.78% 219.111ms 3.73% 1.053s 76.958us 0.000us 0.00% 234.328ms 17.129us 0 b 0 b 0 b 0 b 13680
ProfilerStep* 28.94% 8.163s 74.29% 20.956s 1.048s 0.000us 0.00% 4.529s 226.464ms -80 b -6.06 Kb -140.50 Kb -223.75 Gb 20
aten::to 0.35% 97.902ms 7.17% 2.022s 293.019us 0.000us 0.00% 266.191ms 38.578us 20.16 Kb 0 b 87.50 Gb 0 b 6900
aten::_to_copy 0.78% 219.796ms 6.82% 1.924s 357.608us 0.000us 0.00% 266.191ms 49.478us 20.16 Kb 0 b 87.50 Gb 0 b 5380
aten::empty_strided 0.70% 197.213ms 0.70% 197.213ms 22.564us 0.000us 0.00% 0.000us 0.000us 20.16 Kb 20.16 Kb 160.80 Gb 160.80 Gb 8740
cudaMemcpyAsync 0.94% 265.155ms 0.94% 265.155ms 133.917us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1980
cudaStreamSynchronize 4.80% 1.355s 4.80% 1.355s 1.783ms 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 760
aten::detach_ 0.00% 624.000us 0.00% 705.000us 17.625us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 40
detach_ 0.00% 81.000us 0.00% 81.000us 2.025us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 40
aten::resize_ 0.38% 108.474ms 0.38% 108.474ms 11.206us 0.000us 0.00% 0.000us 0.000us 70.14 Kb 70.14 Kb 111.27 Gb 111.27 Gb 9680
aten::as_strided 0.25% 69.109ms 0.25% 69.109ms 1.903us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 36320
cudaLaunchKernel 10.65% 3.003s 10.65% 3.003s 22.095us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 135934
aten::resolve_conj 0.00% 70.000us 0.00% 70.000us 1.167us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 60
aten::resolve_neg 0.00% 38.000us 0.00% 38.000us 0.633us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 60
aten::rand 0.07% 20.167ms 0.09% 25.328ms 97.415us 0.000us 0.00% 278.000us 1.069us 960 b 0 b 175.80 Mb 0 b 260
aten::slice 0.25% 69.830ms 0.26% 74.156ms 26.675us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2780
aten::select 0.05% 14.084ms 0.05% 15.001ms 20.835us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 720
aten::contiguous 0.22% 62.114ms 1.42% 400.499ms 192.548us 0.000us 0.00% 105.538ms 50.739us 1.10 Mb 0 b 27.87 Gb 0 b 2080
aten::clone 0.67% 188.917ms 1.74% 489.520ms 143.976us 0.000us 0.00% 309.068ms 90.902us 1.10 Mb 0 b 87.47 Gb 0 b 3400
aten::empty_like 0.78% 221.340ms 1.34% 378.224ms 49.377us 0.000us 0.00% 0.000us 0.000us 1.10 Mb 0 b 154.00 Gb 0 b 7660
aten::randint 0.00% 721.000us 0.01% 1.959ms 97.950us 0.000us 0.00% 99.000us 4.950us 0 b 0 b 140.00 Kb 0 b 20
rnnt_encoder 0.14% 39.330ms 34.37% 9.696s 484.802ms 0.000us 0.00% 4.369s 218.452ms 2.00 Kb -70.53 Kb 217.45 Gb -562.27 Mb 20
rnnt_encoder_embed 0.15% 41.731ms 1.94% 546.194ms 27.310ms 0.000us 0.00% 167.920ms 8.396ms -80 b -400 b 11.06 Gb -4.29 Gb 20
aten::unsqueeze 0.33% 92.627ms 0.35% 98.498ms 32.189us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 3060
aten::conv2d 0.01% 2.562ms 0.12% 34.847ms 580.783us 0.000us 0.00% 42.618ms 710.300us 0 b 0 b 4.01 Gb 0 b 60
aten::convolution 0.12% 33.120ms 2.03% 571.886ms 714.857us 0.000us 0.00% 425.194ms 531.492us 0 b 0 b 20.52 Gb 0 b 800
aten::_convolution 0.61% 171.697ms 1.91% 538.766ms 673.457us 0.000us 0.00% 425.194ms 531.492us 0 b 0 b 20.52 Gb 0 b 800
cudaEventRecord 0.04% 10.388ms 0.04% 10.388ms 2.103us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 4940
aten::reshape 0.45% 127.007ms 0.93% 263.603ms 28.043us 0.000us 0.00% 60.386ms 6.424us 0 b 0 b 13.15 Gb 0 b 9400
aten::_reshape_alias 0.13% 37.616ms 0.13% 37.616ms 4.334us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 8680
ActivationBalancerFunction 3.85% 1.087s 10.74% 3.029s 2.367ms 0.000us 0.00% 553.043ms 432.065us 0 b 0 b 11.02 Gb -87.53 Gb 1280
cudaMemsetAsync 0.51% 143.253ms 0.51% 143.253ms 13.489us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 10620
aten::rsub 0.41% 114.504ms 0.79% 224.099ms 108.786us 0.000us 0.00% 115.527ms 56.081us 0 b 0 b 33.61 Gb 0 b 2060
aten::relu 0.31% 88.182ms 1.54% 433.161ms 206.267us 0.000us 0.00% 3.230ms 1.538us 0 b 0 b 77.92 Mb 0 b 2100
aten::view_as 0.94% 264.408ms 0.97% 272.389ms 212.804us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1280
aten::view 0.20% 57.243ms 0.20% 57.243ms 5.380us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 10640
DoubleSwishFunction 0.66% 186.217ms 1.14% 321.678ms 412.408us 0.000us 0.00% 364.905ms 467.827us 0 b 0 b 67.21 Gb -33.60 Gb 780
aten::detach 0.10% 28.217ms 0.11% 31.364ms 40.210us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 780
detach 0.01% 3.147ms 0.01% 3.147ms 4.035us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 780
aten::transpose 0.76% 215.709ms 0.86% 241.903ms 17.555us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 13780
aten::linear 1.84% 517.984ms 4.88% 1.377s 764.727us 0.000us 0.00% 1.817s 1.010ms 0 b 0 b 58.84 Gb 0 b 1800
aten::t 0.45% 125.917ms 0.92% 258.402ms 29.431us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 8780
aten::matmul 1.27% 356.905ms 3.41% 961.528ms 418.056us 0.000us 0.00% 1.933s 840.408us 0 b 0 b 134.63 Gb 0 b 2300
cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.05% 14.582ms 0.05% 14.582ms 2.054us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 7100
cudaEventQuery 0.04% 10.760ms 0.04% 10.760ms 3.241us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 3320
aten::_unsafe_view 0.39% 110.205ms 0.42% 118.747ms 39.320us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 3020
aten::result_type 0.00% 1.153ms 0.00% 1.153ms 1.109us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1040
aten::dropout 0.20% 56.223ms 1.45% 408.676ms 237.602us 0.000us 0.00% 160.717ms 93.440us 0 b 0 b 49.74 Gb -8.76 Mb 1720
aten::permute 0.14% 40.151ms 0.15% 43.709ms 26.017us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1680
aten::item 0.09% 24.409ms 0.84% 237.456ms 296.820us 0.000us 0.00% 871.000us 1.089us 0 b 0 b 0 b 0 b 800
aten::expand 0.30% 84.509ms 0.32% 89.671ms 30.091us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2980
aten::expand_as 0.01% 2.079ms 0.02% 4.395ms 43.950us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 100
rnnt_encoder_layer 1.18% 333.477ms 32.04% 9.038s 37.659ms 0.000us 0.00% 4.199s 17.497ms 1.14 Kb -5.42 Kb 206.54 Gb -43.05 Gb 240
rnnt_encoder_layer_feed_forward_macaron 0.71% 201.251ms 5.95% 1.677s 6.988ms 0.000us 0.00% 1.059s 4.413ms -960 b -4.69 Kb 54.52 Gb -13.15 Gb 240
rnnt_encoder_layer_self_attn 2.40% 676.390ms 8.88% 2.505s 10.438ms 0.000us 0.00% 1.314s 5.474ms 960 b -2.81 Kb 76.69 Gb -115.34 Gb 240
aten::chunk 0.03% 8.315ms 0.31% 88.140ms 367.250us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 240
aten::split 0.09% 25.735ms 0.28% 79.825ms 332.604us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 240
aten::narrow 0.16% 45.533ms 0.33% 92.288ms 64.992us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1420
aten::masked_fill 0.12% 33.983ms 0.44% 123.354ms 513.975us 0.000us 0.00% 126.640ms 527.667us 0 b 0 b 23.07 Gb 0 b 240
aten::softmax 0.03% 9.085ms 0.08% 21.790ms 90.792us 0.000us 0.00% 130.369ms 543.204us 0 b 0 b 23.06 Gb 0 b 240
rnnt_encoder_layer_conv 1.05% 295.586ms 7.36% 2.076s 8.649ms 0.000us 0.00% 575.679ms 2.399ms -960 b -4.69 Kb 26.32 Gb -3.29 Gb 240
aten::conv1d 0.11% 29.812ms 2.02% 569.413ms 769.477us 0.000us 0.00% 382.576ms 516.995us 0 b 0 b 16.51 Gb 0 b 740
aten::squeeze 0.18% 51.811ms 0.20% 55.926ms 23.302us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2400
rnnt_encoder_layer_feed_forward 0.71% 201.388ms 4.79% 1.352s 5.631ms 0.000us 0.00% 1.035s 4.311ms -960 b -4.69 Kb 54.49 Gb -13.15 Gb 240
aten::as_strided_ 0.00% 1.100ms 0.00% 1.100ms 2.200us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 500
aten::is_nonzero 0.00% 278.000us 0.02% 5.497ms 274.850us 0.000us 0.00% 43.000us 2.150us 0 b 0 b 0 b 0 b 20
aten::constant_pad_nd 0.01% 3.765ms 0.04% 10.868ms 181.133us 0.000us 0.00% 954.000us 15.900us 0 b 0 b 136.71 Mb 0 b 60
rnnt_decoder 0.04% 10.113ms 0.14% 39.257ms 1.963ms 0.000us 0.00% 1.663ms 83.150us -80 b -400 b 136.57 Mb -155.34 Mb 20
aten::embedding 0.01% 1.851ms 0.02% 4.944ms 247.200us 0.000us 0.00% 255.000us 12.750us 0 b 0 b 67.91 Mb 0 b 20
aten::zeros_like 0.00% 1.045ms 0.02% 5.211ms 86.850us 0.000us 0.00% 248.000us 4.133us 0 b 0 b 120.35 Mb 0 b 60
aten::stack 0.00% 1.371ms 0.02% 4.798ms 239.900us 0.000us 0.00% 54.000us 2.700us 0 b 0 b 10.00 Kb 0 b 20
aten::cat 0.02% 6.960ms 0.14% 40.795ms 97.131us 0.000us 0.00% 50.086ms 119.252us 0 b 0 b 10.32 Gb 0 b 420
rnnt_loss_simple 0.26% 73.394ms 0.87% 245.872ms 12.294ms 0.000us 0.00% 38.942ms 1.947ms 880 b -9.45 Kb 982.80 Mb -1.42 Gb 20
aten::mv 0.01% 1.541ms 0.01% 3.869ms 96.725us 0.000us 0.00% 1.502ms 37.550us 0 b 0 b 606.00 Kb 0 b 40
aten::full 0.01% 3.373ms 0.03% 7.188ms 89.850us 0.000us 0.00% 298.000us 3.725us 0 b 0 b 114.68 Mb 0 b 80
aten::ones 0.01% 1.911ms 0.01% 3.702ms 92.550us 0.000us 0.00% 63.000us 1.575us 0 b 0 b 20.00 Kb 0 b 40
aten::where 0.02% 6.124ms 0.09% 25.862ms 258.620us 0.000us 0.00% 421.000us 4.210us 0 b 0 b 3.32 Mb -20.00 Kb 100
aten::scalar_tensor 0.01% 1.642ms 0.01% 3.410ms 85.250us 0.000us 0.00% 71.000us 1.775us 0 b 0 b 20.00 Kb 0 b 40
cudaPeekAtLastError 0.00% 0.000us 0.00% 0.000us 0.000us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2560
cudaDeviceGetAttribute 0.00% 62.000us 0.00% 62.000us 0.097us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 640
cudaPointerGetAttributes 0.00% 145.000us 0.00% 145.000us 3.625us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 40
aten::logsumexp 0.02% 6.105ms 0.06% 17.829ms 891.450us 0.000us 0.00% 17.058ms 852.900us 0 b 0 b 2.74 Mb -1.34 Gb 20
aten::repeat 0.04% 12.595ms 0.10% 26.821ms 335.262us 0.000us 0.00% 631.000us 7.888us 0 b 0 b 259.04 Mb 0 b 80
aten::alias 0.00% 129.000us 0.00% 129.000us 1.613us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 80
aten::unfold 0.02% 4.243ms 0.02% 4.535ms 22.675us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 200
aten::ones_like 0.01% 1.704ms 0.01% 3.310ms 165.500us 0.000us 0.00% 35.000us 1.750us 0 b 0 b 10.00 Kb 0 b 20
autograd::engine::evaluate_function: AddBackward0 0.35% 97.419ms 1.50% 422.761ms 96.675us 0.000us 0.00% 191.219ms 43.727us 0 b 0 b -3.41 Gb -6.95 Gb 4373
AddBackward0 0.01% 4.183ms 0.01% 4.183ms 0.957us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 4373
autograd::engine::evaluate_function: MulBackward0 0.82% 230.128ms 4.70% 1.325s 217.066us 0.000us 0.00% 160.464ms 26.280us -3.17 Kb -3.17 Kb 257.55 Mb -22.75 Gb 6106
MulBackward0 0.32% 89.892ms 2.25% 634.514ms 103.916us 0.000us 0.00% 109.691ms 17.964us 0 b 0 b 22.99 Gb 0 b 6106
autograd::engine::evaluate_function: NegBackward0 0.00% 585.000us 0.01% 2.552ms 63.800us 0.000us 0.00% 58.000us 1.450us 0 b 0 b 10.00 Kb -10.00 Kb 40
NegBackward0 0.00% 271.000us 0.01% 1.967ms 49.175us 0.000us 0.00% 58.000us 1.450us 0 b 0 b 20.00 Kb 0 b 40
autograd::engine::evaluate_function: SumBackward0 0.00% 377.000us 0.00% 1.272ms 31.800us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 40
SumBackward0 0.00% 341.000us 0.00% 895.000us 22.375us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 40
autograd::engine::evaluate_function: MutualInformati... 0.00% 934.000us 0.05% 14.081ms 352.025us 0.000us 0.00% 1.309ms 32.725us 0 b 0 b -20.00 Kb -20.00 Kb 40
MutualInformationRecursionFunctionBackward 0.02% 4.536ms 0.05% 13.147ms 328.675us 0.000us 0.00% 1.309ms 32.725us 0 b 0 b 0 b -242.49 Mb 40
autograd::engine::evaluate_function: ScatterBackward... 0.00% 1.247ms 0.02% 6.205ms 155.125us 0.000us 0.00% 942.000us 23.550us 0 b 0 b 740.00 Kb -119.44 Mb 40
ScatterBackward1 0.00% 385.000us 0.02% 4.958ms 123.950us 0.000us 0.00% 942.000us 23.550us 0 b 0 b 120.16 Mb 0 b 40
autograd::engine::evaluate_function: CloneBackward0 0.14% 39.604ms 0.29% 83.087ms 36.764us 0.000us 0.00% 55.029ms 24.349us 0 b 0 b -9.93 Gb -19.87 Gb 2260
CloneBackward0 0.01% 1.676ms 0.01% 1.676ms 0.742us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2260
autograd::engine::evaluate_function: PermuteBackward... 0.05% 14.352ms 0.15% 41.983ms 49.980us 0.000us 0.00% 27.894ms 33.207us 0 b 0 b -3.29 Gb -6.58 Gb 840
PermuteBackward0 0.02% 5.393ms 0.06% 15.516ms 18.471us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 840
autograd::engine::evaluate_function: GatherBackward0... 0.02% 5.433ms 0.09% 25.847ms 184.621us 0.000us 0.00% 138.007ms 985.764us 0 b 0 b 57.81 Gb -3.68 Gb 140
GatherBackward0 0.00% 1.185ms 0.07% 19.614ms 140.100us 0.000us 0.00% 132.753ms 948.236us 0 b 0 b 60.16 Gb 0 b 140
aten::gather_backward 0.01% 1.947ms 0.07% 18.429ms 131.636us 0.000us 0.00% 132.753ms 948.236us 0 b 0 b 60.16 Gb 0 b 140
autograd::engine::evaluate_function: CatBackward0 0.00% 872.000us 0.02% 4.276ms 53.450us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 80
CatBackward0 0.00% 814.000us 0.01% 3.404ms 42.550us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 80
autograd::engine::evaluate_function: SubBackward0 0.02% 5.323ms 0.11% 31.325ms 195.781us 0.000us 0.00% 4.503ms 28.144us 0 b 0 b -278.15 Mb -1.02 Gb 160
SubBackward0 0.01% 4.122ms 0.09% 25.632ms 128.160us 0.000us 0.00% 2.918ms 14.590us 0 b 0 b 543.01 Mb -533.17 Mb 200
autograd::engine::evaluate_function: SelectBackward0... 0.01% 2.567ms 0.06% 18.143ms 151.192us 0.000us 0.00% 3.738ms 31.150us 0 b 0 b 1.62 Gb -60.78 Mb 120
SelectBackward0 0.00% 855.000us 0.06% 15.576ms 129.800us 0.000us 0.00% 3.738ms 31.150us 0 b 0 b 1.68 Gb 0 b 120
aten::select_backward 0.01% 2.235ms 0.05% 14.721ms 122.675us 0.000us 0.00% 3.738ms 31.150us 0 b 0 b 1.68 Gb 0 b 120
autograd::engine::evaluate_function: SliceBackward0 0.03% 8.997ms 0.21% 58.442ms 162.339us 0.000us 0.00% 24.202ms 67.228us 0 b 0 b -128.31 Mb -5.43 Gb 360
SliceBackward0 0.01% 2.569ms 0.17% 47.106ms 130.850us 0.000us 0.00% 23.758ms 65.994us 0 b 0 b 5.18 Gb 0 b 360
aten::slice_backward 0.02% 6.597ms 0.16% 44.537ms 123.714us 0.000us 0.00% 23.758ms 65.994us 0 b 0 b 5.18 Gb 0 b 360
autograd::engine::evaluate_function: SqueezeBackward... 0.03% 7.570ms 0.08% 21.492ms 27.554us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 780
SqueezeBackward1 0.01% 4.175ms 0.05% 13.922ms 17.849us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 780
autograd::engine::evaluate_function: LogsumexpBackwa... 0.00% 1.310ms 0.02% 6.009ms 300.450us 0.000us 0.00% 18.895ms 944.750us 0 b 0 b -1.34 Gb -4.02 Gb 20
LogsumexpBackward0 0.00% 1.010ms 0.01% 3.953ms 197.650us 0.000us 0.00% 13.673ms 683.650us 0 b 0 b 1.34 Gb -2.68 Gb 20
autograd::engine::evaluate_function: UnsafeViewBackw... 0.09% 24.243ms 0.23% 65.082ms 25.623us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2540
UnsafeViewBackward0 0.05% 14.311ms 0.14% 40.839ms 16.078us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 2540
autograd::engine::evaluate_function: MmBackward0 0.26% 74.033ms 1.68% 474.201ms 263.445us 0.000us 0.00% 3.617s 2.009ms 0 b 0 b -46.99 Gb -94.95 Gb 1800
MmBackward0 0.32% 88.890ms 1.42% 400.168ms 222.316us 0.000us 0.00% 3.617s 2.009ms 0 b 0 b 47.96 Gb 0 b 1800
autograd::engine::evaluate_function: ViewBackward0 0.14% 38.848ms 0.51% 144.288ms 41.225us 0.000us 0.00% 26.077ms 7.451us 0 b 0 b -275.16 Mb -7.12 Gb 3500
ViewBackward0 0.07% 20.223ms 0.37% 104.577ms 29.879us 0.000us 0.00% 24.961ms 7.132us 0 b 0 b 6.58 Gb 0 b 3500
autograd::engine::evaluate_function: TBackward0 0.06% 17.186ms 0.19% 54.016ms 30.009us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1800
TBackward0 0.03% 8.455ms 0.13% 36.830ms 20.461us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1800
autograd::engine::evaluate_function: torch::autograd... 0.79% 221.860ms 2.31% 651.584ms 58.490us 0.000us 0.00% 34.383ms 3.086us 0 b 0 b -5.94 Gb -6.11 Gb 11140
torch::autograd::AccumulateGrad 0.32% 89.259ms 1.52% 429.724ms 38.575us 0.000us 0.00% 34.383ms 3.086us 0 b 0 b 175.80 Mb 0 b 11140
autograd::engine::evaluate_function: ExpBackward0 0.89% 251.630ms 1.99% 560.814ms 98.044us 0.000us 0.00% 8.480ms 1.483us 0 b 0 b -348.07 Mb -695.84 Mb 5720
ExpBackward0 0.18% 49.794ms 1.10% 309.184ms 54.053us 0.000us 0.00% 8.480ms 1.483us 0 b 0 b 347.78 Mb 0 b 5720
autograd::engine::evaluate_function: TanhBackward0 0.00% 553.000us 0.01% 1.773ms 88.650us 0.000us 0.00% 11.839ms 591.950us 0 b 0 b -1.37 Gb -2.74 Gb 20
TanhBackward0 0.00% 227.000us 0.00% 1.220ms 61.000us 0.000us 0.00% 11.839ms 591.950us 0 b 0 b 1.37 Gb 0 b 20
autograd::engine::evaluate_function: ExpandBackward0... 0.06% 16.079ms 0.14% 38.497ms 29.164us 0.000us 0.00% 117.627ms 89.111us 0 b 0 b -65.57 Gb -66.71 Gb 1320
ExpandBackward0 0.01% 3.496ms 0.07% 20.793ms 15.752us 0.000us 0.00% 117.360ms 88.909us 0 b 0 b 1.07 Gb 0 b 1320
autograd::engine::evaluate_function: UnsqueezeBackwa... 0.06% 17.377ms 0.21% 58.459ms 36.537us 0.000us 0.00% 1.038ms 0.649us 0 b 0 b -278.26 Mb -556.69 Mb 1600
UnsqueezeBackward0 0.08% 23.235ms 0.14% 40.250ms 25.156us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1600
autograd::engine::evaluate_function: torch::autograd... 0.01% 2.900ms 0.11% 30.888ms 386.100us 0.000us 0.00% 3.147ms 39.337us 0 b 0 b 187.31 Mb -310.97 Mb 80
torch::autograd::CopySlices 0.01% 3.821ms 0.09% 26.272ms 328.400us 0.000us 0.00% 2.779ms 34.737us 0 b 0 b 497.17 Mb -242.47 Mb 80
aten::new_empty_strided 0.00% 825.000us 0.01% 3.083ms 30.830us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 419.19 Mb 0 b 100
torch::autograd::CopyBackwards 0.00% 702.000us 0.01% 3.574ms 89.350us 0.000us 0.00% 220.000us 5.500us 0 b 0 b 120.34 Mb 0 b 40
autograd::engine::evaluate_function: AsStridedBackwa... 0.02% 4.355ms 0.17% 48.652ms 173.757us 0.000us 0.00% 193.870ms 692.393us 0 b 0 b 45.98 Gb -367.58 Mb 280
AsStridedBackward0 0.03% 8.099ms 0.15% 42.698ms 152.493us 0.000us 0.00% 193.506ms 691.093us 0 b 0 b 46.22 Gb 0 b 280
aten::new_zeros 0.01% 1.996ms 0.08% 23.305ms 83.232us 0.000us 0.00% 88.603ms 316.439us 0 b 0 b 46.22 Gb 0 b 280
autograd::engine::evaluate_function: TransposeBackwa... 0.06% 16.762ms 0.16% 46.008ms 26.141us 0.000us 0.00% 1.721ms 0.978us 0 b 0 b -278.78 Mb -556.69 Mb 1760
TransposeBackward0 0.03% 9.000ms 0.10% 28.354ms 16.110us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1760
autograd::engine::evaluate_function: LogBackward0 0.01% 1.912ms 0.02% 5.788ms 72.350us 0.000us 0.00% 445.000us 5.562us 0 b 0 b -63.38 Mb -126.55 Mb 80
LogBackward0 0.00% 620.000us 0.01% 3.876ms 48.450us 0.000us 0.00% 445.000us 5.562us 0 b 0 b 63.17 Mb 0 b 80
autograd::engine::evaluate_function: ReshapeAliasBac... 0.05% 13.326ms 0.13% 37.340ms 27.456us 0.000us 0.00% 22.000us 0.016us 0 b 0 b -40.00 Kb -80.00 Kb 1360
ReshapeAliasBackward0 0.03% 8.178ms 0.08% 23.194ms 17.054us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1360
autograd::engine::evaluate_function: MvBackward0 0.00% 664.000us 0.02% 4.515ms 225.750us 0.000us 0.00% 1.455ms 72.750us 0 b 0 b 277.79 Mb -606.00 Kb 20
MvBackward0 0.00% 396.000us 0.01% 3.851ms 192.550us 0.000us 0.00% 1.455ms 72.750us 0 b 0 b 278.38 Mb 0 b 20
aten::ger 0.00% 136.000us 0.01% 1.530ms 76.500us 0.000us 0.00% 566.000us 28.300us 0 b 0 b 278.34 Mb 0 b 20
aten::outer 0.00% 261.000us 0.00% 1.394ms 69.700us 0.000us 0.00% 566.000us 28.300us 0 b 0 b 278.34 Mb 0 b 20
autograd::engine::evaluate_function: MeanBackward1 0.02% 5.346ms 0.12% 33.886ms 121.021us 0.000us 0.00% 9.429ms 33.675us 0 b 0 b 3.62 Gb -7.22 Mb 280
MeanBackward1 0.02% 4.784ms 0.10% 28.540ms 101.929us 0.000us 0.00% 9.429ms 33.675us 0 b 0 b 3.63 Gb 0 b 280
autograd::engine::evaluate_function: DivBackward0 0.01% 1.521ms 0.03% 8.235ms 411.750us 0.000us 0.00% 1.821ms 91.050us 0 b 0 b -50.00 Kb -133.43 Mb 20
DivBackward0 0.00% 1.162ms 0.02% 4.983ms 249.150us 0.000us 0.00% 1.586ms 79.300us 0 b 0 b 133.11 Mb -201.45 Mb 20
autograd::engine::evaluate_function: SumBackward1 0.00% 717.000us 0.01% 1.988ms 99.400us 0.000us 0.00% 283.000us 14.150us 0 b 0 b -230.00 Kb -66.93 Mb 20
SumBackward1 0.00% 147.000us 0.00% 410.000us 20.500us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 20
autograd::engine::evaluate_function: BmmBackward0 0.13% 36.814ms 0.54% 151.314ms 204.478us 0.000us 0.00% 1.066s 1.440ms 0 b 0 b -49.11 Gb -92.25 Gb 740
BmmBackward0 0.06% 16.876ms 0.41% 114.500ms 154.730us 0.000us 0.00% 1.066s 1.440ms 0 b 0 b 43.13 Gb 0 b 740
autograd::engine::evaluate_function: MaxBackward0 0.01% 2.328ms 0.03% 9.330ms 233.250us 0.000us 0.00% 2.106ms 52.650us 0 b 0 b -3.07 Mb -692.74 Mb 40
MaxBackward0 0.00% 396.000us 0.02% 5.481ms 137.025us 0.000us 0.00% 819.000us 20.475us 0 b 0 b 344.62 Mb 0 b 40
aten::value_selecting_reduction_backward 0.00% 584.000us 0.02% 5.085ms 127.125us 0.000us 0.00% 819.000us 20.475us 0 b 0 b 344.62 Mb 0 b 40
autograd::engine::evaluate_function: ReluBackward0 0.00% 566.000us 0.01% 2.090ms 104.500us 0.000us 0.00% 446.000us 22.300us 0 b 0 b -67.94 Mb -135.84 Mb 20
ReluBackward0 0.00% 245.000us 0.01% 1.524ms 76.200us 0.000us 0.00% 446.000us 22.300us 0 b 0 b 67.91 Mb 0 b 20
autograd::engine::evaluate_function: ConvDepthwise2D... 0.04% 11.413ms 0.29% 80.478ms 309.531us 0.000us 0.00% 239.752ms 922.123us 0 b 0 b -3.36 Gb -6.74 Gb 260
ConvDepthwise2DBackward0 0.04% 10.812ms 0.24% 69.065ms 265.635us 0.000us 0.00% 239.752ms 922.123us 0 b 0 b 3.37 Gb -210.42 Mb 260
autograd::engine::evaluate_function: ConstantPadNdBa... 0.00% 400.000us 0.01% 2.706ms 135.300us 0.000us 0.00% 269.000us 13.450us 0 b 0 b -640.00 Kb -68.53 Mb 20
ConstantPadNdBackward0 0.00% 162.000us 0.01% 2.306ms 115.300us 0.000us 0.00% 269.000us 13.450us 0 b 0 b 67.91 Mb 0 b 20
autograd::engine::evaluate_function: EmbeddingBackwa... 0.00% 583.000us 0.02% 4.984ms 249.200us 0.000us 0.00% 1.964ms 98.200us 0 b 0 b -48.65 Mb -68.18 Mb 20
EmbeddingBackward0 0.00% 176.000us 0.02% 4.401ms 220.050us 0.000us 0.00% 1.964ms 98.200us 0 b 0 b 19.53 Mb 0 b 20
aten::embedding_backward 0.00% 193.000us 0.01% 4.225ms 211.250us 0.000us 0.00% 1.964ms 98.200us 0 b 0 b 19.53 Mb 0 b 20
autograd::engine::evaluate_function: PowBackward0 0.08% 22.781ms 0.57% 161.050ms 309.712us 0.000us 0.00% 61.549ms 118.363us 0 b 0 b -7.13 Gb -14.27 Gb 520
PowBackward0 0.07% 18.671ms 0.45% 126.436ms 243.146us 0.000us 0.00% 44.095ms 84.798us 0 b 0 b 3.58 Gb -7.15 Gb 520
autograd::engine::evaluate_function: ActivationBalan... 0.26% 73.216ms 5.39% 1.521s 1.188ms 0.000us 0.00% 1.315s 1.027ms 0 b 0 b -11.02 Gb -54.76 Gb 1280
ActivationBalancerFunctionBackward 1.53% 431.814ms 5.13% 1.447s 1.131ms 0.000us 0.00% 1.315s 1.027ms 0 b 0 b 43.75 Gb -306.28 Gb 1280
autograd::engine::evaluate_function: FusedDropoutBac... 0.12% 35.149ms 0.59% 165.437ms 113.313us 0.000us 0.00% 170.215ms 116.586us 0 b 0 b 3.00 Gb -36.72 Gb 1460
FusedDropoutBackward0 0.04% 11.877ms 0.46% 130.288ms 89.238us 0.000us 0.00% 170.215ms 116.586us 0 b 0 b 39.73 Gb 0 b 1460
autograd::engine::evaluate_function: DoubleSwishFunc... 0.11% 31.054ms 1.24% 349.307ms 447.829us 0.000us 0.00% 604.085ms 774.468us 0 b 0 b -67.22 Gb -100.82 Gb 780
DoubleSwishFunctionBackward 0.37% 103.567ms 1.13% 318.253ms 408.017us 0.000us 0.00% 604.085ms 774.468us 0 b 0 b 33.60 Gb -100.82 Gb 780
autograd::engine::evaluate_function: CudnnConvolutio... 0.07% 20.498ms 0.95% 267.515ms 495.398us 0.000us 0.00% 1.305s 2.416ms 0 b 0 b -7.69 Gb -17.91 Gb 540
CudnnConvolutionBackward0 0.03% 7.802ms 0.88% 247.017ms 457.439us 0.000us 0.00% 1.305s 2.416ms 0 b 0 b 10.21 Gb 0 b 540
aten::cudnn_convolution_backward 0.07% 19.271ms 0.85% 239.215ms 442.991us 0.000us 0.00% 1.305s 2.416ms 0 b 0 b 10.21 Gb -3.29 Gb 540
autograd::engine::evaluate_function: GluBackward0 0.03% 7.680ms 0.09% 26.453ms 110.221us 0.000us 0.00% 24.153ms 100.638us 0 b 0 b -3.29 Gb -9.87 Gb 240
GluBackward0 0.01% 2.063ms 0.07% 18.773ms 78.221us 0.000us 0.00% 24.153ms 100.638us 0 b 0 b 6.58 Gb 0 b 240
autograd::engine::evaluate_function: SoftmaxBackward... 0.03% 9.291ms 0.13% 36.459ms 151.912us 0.000us 0.00% 240.176ms 1.001ms 0 b 0 b -46.13 Gb -69.20 Gb 240
SoftmaxBackward0 0.01% 2.753ms 0.10% 27.168ms 113.200us 0.000us 0.00% 240.176ms 1.001ms 0 b 0 b 23.06 Gb 0 b 240
autograd::engine::evaluate_function: MaskedFillBackw... 0.02% 6.134ms 0.15% 41.082ms 171.175us 0.000us 0.00% 160.795ms 669.979us 0 b 0 b 6.01 Mb -23.06 Gb 240
MaskedFillBackward0 0.01% 3.272ms 0.12% 34.948ms 145.617us 0.000us 0.00% 160.795ms 669.979us 0 b 0 b 23.07 Gb 0 b 240
autograd::engine::evaluate_function: SplitBackward0 0.03% 8.709ms 0.11% 29.681ms 123.671us 0.000us 0.00% 48.181ms 200.754us 0 b 0 b 27.53 Mb -9.88 Gb 240
SplitBackward0 0.01% 1.883ms 0.07% 20.972ms 87.383us 0.000us 0.00% 48.181ms 200.754us 0 b 0 b 9.90 Gb 0 b 240
cudaBindTexture 0.00% 936.000us 0.00% 936.000us 5.200us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 180
cudaUnbindTexture 0.00% 200.000us 0.00% 200.000us 1.111us 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 180
cudaDeviceSynchronize 0.22% 61.396ms 0.22% 61.396ms 61.396ms 0.000us 0.00% 0.000us 0.000us 0 b 0 b 0 b 0 b 1
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------
Self CPU time total: 28.207s
Self CUDA time total: 14.423s