This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit af3f8f5bd2ed0d55cf8614064d722b533eef77e9 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Thu Feb 19 02:08:32 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Sun Feb 22 01:05:12 2026 +0100 avcodec/x86/vvc/of: Break dependency chain Don't extract and update one word of one and the same register at a time; use separate src and dst registers, so that pextrw and bsr can be done in parallel. Also use movd instead of pinsrw for the first word. Old benchmarks: apply_bdof_8_8x16_c: 3275.2 ( 1.00x) apply_bdof_8_8x16_avx2: 487.6 ( 6.72x) apply_bdof_8_16x8_c: 3243.1 ( 1.00x) apply_bdof_8_16x8_avx2: 284.4 (11.40x) apply_bdof_8_16x16_c: 6501.8 ( 1.00x) apply_bdof_8_16x16_avx2: 570.0 (11.41x) apply_bdof_10_8x16_c: 3286.5 ( 1.00x) apply_bdof_10_8x16_avx2: 461.7 ( 7.12x) apply_bdof_10_16x8_c: 3274.5 ( 1.00x) apply_bdof_10_16x8_avx2: 271.4 (12.06x) apply_bdof_10_16x16_c: 6590.0 ( 1.00x) apply_bdof_10_16x16_avx2: 543.9 (12.12x) apply_bdof_12_8x16_c: 3307.6 ( 1.00x) apply_bdof_12_8x16_avx2: 462.2 ( 7.16x) apply_bdof_12_16x8_c: 3287.4 ( 1.00x) apply_bdof_12_16x8_avx2: 271.8 (12.10x) apply_bdof_12_16x16_c: 6465.7 ( 1.00x) apply_bdof_12_16x16_avx2: 543.8 (11.89x) New benchmarks: apply_bdof_8_8x16_c: 3255.7 ( 1.00x) apply_bdof_8_8x16_avx2: 349.3 ( 9.32x) apply_bdof_8_16x8_c: 3262.5 ( 1.00x) apply_bdof_8_16x8_avx2: 214.8 (15.19x) apply_bdof_8_16x16_c: 6471.6 ( 1.00x) apply_bdof_8_16x16_avx2: 429.8 (15.06x) apply_bdof_10_8x16_c: 3227.7 ( 1.00x) apply_bdof_10_8x16_avx2: 321.6 (10.04x) apply_bdof_10_16x8_c: 3250.2 ( 1.00x) apply_bdof_10_16x8_avx2: 201.2 (16.16x) apply_bdof_10_16x16_c: 6476.5 ( 1.00x) apply_bdof_10_16x16_avx2: 400.9 (16.16x) apply_bdof_12_8x16_c: 3230.7 ( 1.00x) apply_bdof_12_8x16_avx2: 321.8 (10.04x) apply_bdof_12_16x8_c: 3210.5 ( 1.00x) apply_bdof_12_16x8_avx2: 200.9 (15.98x) apply_bdof_12_16x16_c: 6474.5 ( 1.00x) apply_bdof_12_16x16_avx2: 400.2 (16.18x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/vvc/of.asm | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/libavcodec/x86/vvc/of.asm b/libavcodec/x86/vvc/of.asm index e11ada8d20..232dc1c2fd 100644 --- a/libavcodec/x86/vvc/of.asm +++ b/libavcodec/x86/vvc/of.asm @@ -251,21 +251,25 @@ INIT_YMM avx2 psrlw %3, %4 %endmacro -%macro LOG2 2 ; dst/src, offset - pextrw tmp0d, xm%1, %2 +%macro LOG2 3 ; dst, src, offset + pextrw tmp0d, xm%2, %3 bsr tmp0d, tmp0d - pinsrw xm%1, tmp0d, %2 +%if %3 != 0 + pinsrw xm%1, tmp0d, %3 +%else + movd xm%1, tmp0d +%endif %endmacro -%macro LOG2 1 ; dst/src - LOG2 %1, 0 - LOG2 %1, 1 - LOG2 %1, 2 - LOG2 %1, 3 - LOG2 %1, 4 - LOG2 %1, 5 - LOG2 %1, 6 - LOG2 %1, 7 +%macro LOG2 2 ; dst, src + LOG2 %1, %2, 0 + LOG2 %1, %2, 1 + LOG2 %1, %2, 2 + LOG2 %1, %2, 3 + LOG2 %1, %2, 4 + LOG2 %1, %2, 5 + LOG2 %1, %2, 6 + LOG2 %1, %2, 7 %endmacro ; %1: 4 (sgx2, sgy2, sgxdi, gydi) @@ -277,8 +281,7 @@ INIT_YMM avx2 punpcklqdq m8, m%1, m7 ; 4 (sgx2, sgy2) punpckhqdq m9, m%1, m7 ; 4 (sgxdi, sgydi) - mova m10, m8 - LOG2 10 ; 4 (log2(sgx2), log2(sgy2)) + LOG2 10, 8 ; 4 (log2(sgx2), log2(sgy2)) ; Promote to dword since vpsrlvw is AVX-512 only pmovsxwd m8, xm8 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
