Older versions of clang choke if that function is forcibly inlined.
Furthermore, inlining the function gives no performance benefit at
least with gcc 4.4 and 4.6.
---
I ran benchmarks on my trusty old K6-III.
gcc 4.7 inlines the function with both inline and av_always_inline.
gcc 4.4 and 4.6 don't, but inlining gives no performance boost, on
the contrary:
$ gcc-4.6 --version
gcc-4.6 (Debian 4.6.3-14) 4.6.3
vanilla libav:
$ nm libavcodec/h264_cabac.o
U av_log
00002000 r b_mb_type_info
00002140 r b_sub_mb_type_info
00001800 r cabac_context_init_I
00000000 r cabac_context_init_PB
00002184 r chroma422_dc_scan
0000218c r chroma_dc_scan
00002310 r coeff_abs_level1_ctx.8503
00002340 r coeff_abs_level_m1_offset.8500
00002318 r coeff_abs_level_transition.8505
00002328 r coeff_abs_levelgt1_ctx.8504
00001a10 t decode_cabac_intra_mb_type
00002d20 t decode_cabac_mb_mvd.part.2
00001d70 t decode_cabac_mb_ref
00001b90 t decode_cabac_mb_skip
00001e20 t decode_cabac_residual_dc_internal
00002470 t decode_cabac_residual_nondc_internal
00001870 t decode_significance_x86
U ff_h264_cabac_tables
U ff_h264_check_intra4x4_pred_mode
U ff_h264_check_intra_pred_mode
00002ff0 T ff_h264_decode_mb_cabac
00002f60 T ff_h264_init_cabac_states
U ff_h264_mb_sizes
U ff_h264_pred_direct_motion
U ff_init_cabac_decoder
000003d0 t fill_decode_caches
000001b0 t fill_decode_neighbors
000000f0 t get_cabac
00000030 t get_cabac_noinline
00002080 r i_mb_type_info
000022a0 r last_coeff_flag_offset.8499
000021a0 r left_block_options.8119
00000000 t mid_pred
0000205c r p_mb_type_info
00002174 r p_sub_mb_type_info
00002100 r scan8
00002220 r significant_coeff_flag_offset.8498
00002380 r significant_coeff_flag_offset_8x8.8501
00002190 r zeromv.8095
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=312.808s maxrss=7120kB
real 5m20.588s
user 5m12.968s
sys 0m0.420s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=298.707s maxrss=7120kB
real 5m6.113s
user 4m58.875s
sys 0m0.556s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=299.131s maxrss=7116kB
real 5m6.633s
user 4m59.279s
sys 0m0.532s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=298.315s maxrss=7120kB
real 5m5.746s
user 4m58.487s
sys 0m0.476s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=298.567s maxrss=7120kB
real 5m6.268s
user 4m58.727s
sys 0m0.580s
av_always_inline ---> inline:
$ nm libavcodec/h264_cabac.o
U av_log
00002000 r b_mb_type_info
00002140 r b_sub_mb_type_info
00001800 r cabac_context_init_I
00000000 r cabac_context_init_PB
00002184 r chroma422_dc_scan
0000218c r chroma_dc_scan
000023de r coeff_abs_level1_ctx.8503
00002320 r coeff_abs_level_m1_offset.8500
000023e6 r coeff_abs_level_transition.8505
000023f6 r coeff_abs_levelgt1_ctx.8504
00001870 t decode_cabac_intra_mb_type
00002b80 t decode_cabac_mb_mvd.part.2
00001bd0 t decode_cabac_mb_ref
000019f0 t decode_cabac_mb_skip
00001c80 t decode_cabac_residual_internal
U ff_h264_cabac_tables
U ff_h264_check_intra4x4_pred_mode
U ff_h264_check_intra_pred_mode
00002e50 T ff_h264_decode_mb_cabac
00002dc0 T ff_h264_init_cabac_states
U ff_h264_mb_sizes
U ff_h264_pred_direct_motion
U ff_init_cabac_decoder
000003d0 t fill_decode_caches
000001b0 t fill_decode_neighbors
000000f0 t get_cabac
00000030 t get_cabac_noinline
00002080 r i_mb_type_info
000022a0 r last_coeff_flag_offset.8499
000021a0 r left_block_options.8119
00000000 t mid_pred
0000205c r p_mb_type_info
00002174 r p_sub_mb_type_info
00002100 r scan8
00002220 r significant_coeff_flag_offset.8498
00002360 r significant_coeff_flag_offset_8x8.8501
00002190 r zeromv.8095
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=297.635s maxrss=7136kB
real 5m5.237s
user 4m57.783s
sys 0m0.628s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=297.671s maxrss=7136kB
real 5m5.299s
user 4m57.823s
sys 0m0.588s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=298.131s maxrss=7132kB
real 5m5.998s
user 4m58.291s
sys 0m0.624s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=299.767s maxrss=7136kB
real 5m7.328s
user 4m59.935s
sys 0m0.492s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=297.151s maxrss=7136kB
real 5m4.707s
user 4m57.327s
sys 0m0.552s
$ gcc-4.4 --version
gcc-4.4 (Debian 4.4.7-2) 4.4.7
vanilla libav:
$ nm libavcodec/h264_cabac.o
U av_log
00000000 r b_mb_type_info
00000140 r b_sub_mb_type_info
00000420 r cabac_context_init_I
00000c20 r cabac_context_init_PB
00000184 r chroma422_dc_scan
0000018c r chroma_dc_scan
00000240 r coeff_abs_level1_ctx.11975
000002e0 r coeff_abs_level_m1_offset.11972
00000220 r coeff_abs_level_transition.11977
00000230 r coeff_abs_levelgt1_ctx.11976
00001890 t decode_cabac_field_decoding_flag
000018f0 t decode_cabac_intra_mb_type
00001a80 t decode_cabac_mb_mvd
00003e40 t decode_cabac_residual_dc_internal
00002270 t decode_cabac_residual_nondc_internal
U ff_h264_cabac_tables
U ff_h264_check_intra4x4_pred_mode
U ff_h264_check_intra_pred_mode
000057c0 T ff_h264_decode_mb_cabac
00001800 T ff_h264_init_cabac_states
U ff_h264_mb_sizes
U ff_h264_pred_direct_motion
U ff_init_cabac_decoder
00000320 t fill_decode_caches
000000c0 t fill_decode_neighbors
00000000 t get_cabac_noinline
00000080 r i_mb_type_info
00000320 r last_coeff_flag_offset.11971
000001a0 r left_block_options.9772
0000005c r p_mb_type_info
00000174 r p_sub_mb_type_info
00000100 r scan8
000003a0 r significant_coeff_flag_offset.11970
00000260 r significant_coeff_flag_offset_8x8.11973
00000190 r zeromv.9535
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=322.728s maxrss=7288kB
real 5m30.681s
user 5m22.892s
sys 0m0.520s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=324.000s maxrss=7288kB
real 5m31.861s
user 5m24.152s
sys 0m0.508s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=322.316s maxrss=7288kB
real 5m30.225s
user 5m22.468s
sys 0m0.500s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=321.920s maxrss=7284kB
real 5m29.886s
user 5m22.076s
sys 0m0.516s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=322.108s maxrss=7288kB
real 5m30.032s
user 5m22.252s
sys 0m0.524s
av_always_inline ---> inline:
$ nm libavcodec/h264_cabac.o
U av_log
00000000 r b_mb_type_info
00000140 r b_sub_mb_type_info
00000420 r cabac_context_init_I
00000c20 r cabac_context_init_PB
00000184 r chroma422_dc_scan
0000018c r chroma_dc_scan
00000240 r coeff_abs_level1_ctx.11975
000002e0 r coeff_abs_level_m1_offset.11972
00000220 r coeff_abs_level_transition.11977
00000230 r coeff_abs_levelgt1_ctx.11976
00001890 t decode_cabac_field_decoding_flag
000018f0 t decode_cabac_intra_mb_type
000043d0 t decode_cabac_mb_mvd
00001a80 t decode_cabac_residual_internal
U ff_h264_cabac_tables
U ff_h264_check_intra4x4_pred_mode
U ff_h264_check_intra_pred_mode
00004bc0 T ff_h264_decode_mb_cabac
00001800 T ff_h264_init_cabac_states
U ff_h264_mb_sizes
U ff_h264_pred_direct_motion
U ff_init_cabac_decoder
00000320 t fill_decode_caches
000000c0 t fill_decode_neighbors
00000000 t get_cabac_noinline
00000080 r i_mb_type_info
00000320 r last_coeff_flag_offset.11971
000001a0 r left_block_options.9772
0000005c r p_mb_type_info
00000174 r p_sub_mb_type_info
00000100 r scan8
00000248 r sig_coeff_offset_dc.11974
000003a0 r significant_coeff_flag_offset.11970
00000260 r significant_coeff_flag_offset_8x8.11973
00000190 r zeromv.9535
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=320.132s maxrss=7260kB
real 5m28.130s
user 5m20.288s
sys 0m0.532s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=318.136s maxrss=7264kB
real 5m26.017s
user 5m18.292s
sys 0m0.540s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=317.760s maxrss=7260kB
real 5m25.614s
user 5m17.936s
sys 0m0.476s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=317.896s maxrss=7264kB
real 5m25.896s
user 5m18.040s
sys 0m0.564s
$ time ./avconv -benchmark -i ~/Downloads/cathedral-beta2-400extra-crop-avc.mp4
-an -f null -v 0 -
bench: utime=319.592s maxrss=7264kB
real 5m27.508s
user 5m19.756s
sys 0m0.544s
libavcodec/h264_cabac.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index c0e3356..abdddb7 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1556,7 +1556,7 @@ static av_always_inline int get_cabac_cbf_ctx(
H264Context *h, int cat, int idx,
return base_ctx[cat] + ctx;
}
-static av_always_inline void
+static inline void
decode_cabac_residual_internal(H264Context *h, int16_t *block,
int cat, int n, const uint8_t *scantable,
const uint32_t *qmul, int max_coeff,
--
1.7.10.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel