Skip to content

Commit aa9705f

Browse files
authored
Update profiler_recipe.py
1 parent 906944c commit aa9705f

File tree

1 file changed

+35
-33
lines changed

1 file changed

+35
-33
lines changed

recipes_source/recipes/profiler_recipe.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -105,22 +105,24 @@
105105

106106
######################################################################
107107
# The output will look like (omitting some columns):
108-
109-
# --------------------------------- ------------ ------------ ------------ ------------
110-
# Name Self CPU CPU total CPU time avg # of Calls
111-
# --------------------------------- ------------ ------------ ------------ ------------
112-
# model_inference 5.509ms 57.503ms 57.503ms 1
113-
# aten::conv2d 231.000us 31.931ms 1.597ms 20
114-
# aten::convolution 250.000us 31.700ms 1.585ms 20
115-
# aten::_convolution 336.000us 31.450ms 1.573ms 20
116-
# aten::mkldnn_convolution 30.838ms 31.114ms 1.556ms 20
117-
# aten::batch_norm 211.000us 14.693ms 734.650us 20
118-
# aten::_batch_norm_impl_index 319.000us 14.482ms 724.100us 20
119-
# aten::native_batch_norm 9.229ms 14.109ms 705.450us 20
120-
# aten::mean 332.000us 2.631ms 125.286us 21
121-
# aten::select 1.668ms 2.292ms 8.988us 255
122-
# --------------------------------- ------------ ------------ ------------ ------------
123-
# Self CPU time total: 57.549m
108+
#
109+
# .. code-block:: sh
110+
#
111+
# --------------------------------- ------------ ------------ ------------ ------------
112+
# Name Self CPU CPU total CPU time avg # of Calls
113+
# --------------------------------- ------------ ------------ ------------ ------------
114+
# model_inference 5.509ms 57.503ms 57.503ms 1
115+
# aten::conv2d 231.000us 31.931ms 1.597ms 20
116+
# aten::convolution 250.000us 31.700ms 1.585ms 20
117+
# aten::_convolution 336.000us 31.450ms 1.573ms 20
118+
# aten::mkldnn_convolution 30.838ms 31.114ms 1.556ms 20
119+
# aten::batch_norm 211.000us 14.693ms 734.650us 20
120+
# aten::_batch_norm_impl_index 319.000us 14.482ms 724.100us 20
121+
# aten::native_batch_norm 9.229ms 14.109ms 705.450us 20
122+
# aten::mean 332.000us 2.631ms 125.286us 21
123+
# aten::select 1.668ms 2.292ms 8.988us 255
124+
# --------------------------------- ------------ ------------ ------------ ------------
125+
# Self CPU time total: 57.549m
124126
#
125127

126128
######################################################################
@@ -218,26 +220,26 @@
218220
#
219221
# .. code-block:: sh
220222
#
221-
# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------
222-
# Name Self XPU Self XPU % XPU total XPU time avg # of Calls
223-
# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------
224-
# model_inference 0.000us 0.00% 2.567ms 2.567ms 1
225-
# aten::conv2d 0.000us 0.00% 1.871ms 93.560us 20
226-
# aten::convolution 0.000us 0.00% 1.871ms 93.560us 20
227-
# aten::_convolution 0.000us 0.00% 1.871ms 93.560us 20
228-
# aten::convolution_overrideable 1.871ms 72.89% 1.871ms 93.560us 20
229-
# gen_conv 1.484ms 57.82% 1.484ms 74.216us 20
230-
# aten::batch_norm 0.000us 0.00% 432.640us 21.632us 20
231-
# aten::_batch_norm_impl_index 0.000us 0.00% 432.640us 21.632us 20
232-
# aten::native_batch_norm 432.640us 16.85% 432.640us 21.632us 20
233-
# conv_reorder 386.880us 15.07% 386.880us 6.448us 60
234-
# ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------
235-
# Self CPU time total: 712.486ms
236-
# Self XPU time total: 2.567ms
223+
# ------------------------------ ------------ ------------ ------------ ------------ ------------
224+
# Name Self XPU Self XPU % XPU total XPU time avg # of Calls
225+
# ------------------------------ ------------ ------------ ------------ ------------ ------------
226+
# model_inference 0.000us 0.00% 2.567ms 2.567ms 1
227+
# aten::conv2d 0.000us 0.00% 1.871ms 93.560us 20
228+
# aten::convolution 0.000us 0.00% 1.871ms 93.560us 20
229+
# aten::_convolution 0.000us 0.00% 1.871ms 93.560us 20
230+
# aten::convolution_overrideable 1.871ms 72.89% 1.871ms 93.560us 20
231+
# gen_conv 1.484ms 57.82% 1.484ms 74.216us 20
232+
# aten::batch_norm 0.000us 0.00% 432.640us 21.632us 20
233+
# aten::_batch_norm_impl_index 0.000us 0.00% 432.640us 21.632us 20
234+
# aten::native_batch_norm 432.640us 16.85% 432.640us 21.632us 20
235+
# conv_reorder 386.880us 15.07% 386.880us 6.448us 60
236+
# ------------------------------ ------------ ------------ ------------ ------------ ------------
237+
# Self CPU time total: 712.486ms
238+
# Self XPU time total: 2.567ms
237239
#
238240

239241
######################################################################
240-
# Note the occurrence of on-device kernels in the output (e.g. ``sgemm_32x32x32_NN``).
242+
# Note the occurrence of on-device kernels in the output (e.g. ``sgemm_32x32x32_NN`` for CUDA or ``gen_conv`` for XPU).
241243

242244
######################################################################
243245
# 4. Using profiler to analyze memory consumption

0 commit comments

Comments
 (0)