h264_deblock: Various improvements (PR #21535)

mkver via ffmpeg-devel Tue, 20 Jan 2026 17:03:08 -0800

PR #21535 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21535
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21535.patch


Also remove "h264" from H264DSPContext member names.


>From 9c68f3f25e3348e1f56daf1261c9a523fbabf6ed Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 6 Jan 2026 15:00:30 +0100
Subject: [PATCH 01/13] avcodec/x86/h264_deblock: Remove unused macros

Forgotten in 4618f36a2424a3a4d5760afabc2e9dd18d73f0a4.
Also remove a PASS8ROWS wrapper that seems to have been always
unused.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 36 ---------------------------------
 1 file changed, 36 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 479e6c3460..98a0867102 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -37,38 +37,6 @@ cextern pb_0
 cextern pb_1
 cextern pb_3
 
-%define PASS8ROWS(base, base3, stride, stride3, offset) \
-    PASS8ROWS(base+offset, base3+offset, stride, stride3)
-
-; in: 8 rows of 4 bytes in %4..%11
-; out: 4 rows of 8 bytes in m0..m3
-%macro TRANSPOSE4x8_LOAD 11
-    movh       m0, %4
-    movh       m2, %5
-    movh       m1, %6
-    movh       m3, %7
-    punpckl%1  m0, m2
-    punpckl%1  m1, m3
-    mova       m2, m0
-    punpckl%2  m0, m1
-    punpckh%2  m2, m1
-
-    movh       m4, %8
-    movh       m6, %9
-    movh       m5, %10
-    movh       m7, %11
-    punpckl%1  m4, m6
-    punpckl%1  m5, m7
-    mova       m6, m4
-    punpckl%2  m4, m5
-    punpckh%2  m6, m5
-
-    punpckh%3  m1, m0, m4
-    punpckh%3  m3, m2, m6
-    punpckl%3  m0, m4
-    punpckl%3  m2, m6
-%endmacro
-
 ; in: 4 rows of 8 bytes in m0..m3
 ; out: 8 rows of 4 bytes in %1..%8
 %macro TRANSPOSE8x4B_STORE 8
@@ -100,10 +68,6 @@ cextern pb_3
     movh       %8, m4
 %endmacro
 
-%macro TRANSPOSE4x8B_LOAD 8
-    TRANSPOSE4x8_LOAD bw, wd, dq, %1, %2, %3, %4, %5, %6, %7, %8
-%endmacro
-
 %macro SBUTTERFLY3 4
     punpckh%1  %4, %2, %3
     punpckl%1  %2, %3
-- 
2.52.0


>From 2afa66816c6e8bf380fbb6883fc911af6cd6eec7 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 09:00:47 +0100
Subject: [PATCH 02/13] tests/checkasm/h264dsp: Don't test loop filter strength
 0

These functions are not used with these parameters;
see the filter_mb_* functions in h264_loopfilter.c.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 tests/checkasm/h264dsp.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index acf4f61ebb..0bf01e072e 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -374,24 +374,26 @@ static void check_idct_dequant(void)
 
 static void check_loop_filter(void)
 {
+    enum {
+        N = 35,
+    };
     LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
     LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
     LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
     H264DSPContext h;
     int bit_depth;
-    int alphas[36], betas[36];
-    int8_t tc0[36][4];
+    int alphas[N], betas[N];
+    int8_t tc0[N][4];
 
     declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
                       int alpha, int beta, int8_t *tc0);
 
     for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
-        int i, j, a, c;
         uint32_t mask = pixel_mask_lf[bit_depth - 8];
         ff_h264dsp_init(&h, bit_depth, 1);
-        for (i = 35, a = 255, c = 250; i >= 0; i--) {
+        for (int i = N, a = 255, c = 250; --i >= 0;) {
             alphas[i] = a << (bit_depth - 8);
-            betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+            betas[i]  = (i + 2) / 2 << (bit_depth - 8);
             tc0[i][0] = tc0[i][3] = (c + 6) / 10;
             tc0[i][1] = (c + 7) / 15;
             tc0[i][2] = (c + 9) / 20;
@@ -402,9 +404,9 @@ static void check_loop_filter(void)
 #define CHECK_LOOP_FILTER(name, align, idc)                             \
         do {                                                            \
             if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) {   \
-                for (j = 0; j < 36; j++) {                              \
+                for (int j = 0; j < N; ++j) {                           \
                     intptr_t off = 8 * 32 + (j & 15) * 4 * !align;      \
-                    for (i = 0; i < 1024; i+=4) {                       \
+                    for (int i = 0; i < 1024; i += 4) {                 \
                         AV_WN32A(dst + i, rnd() & mask);                \
                     }                                                   \
                     memcpy(dst0, dst, 32 * 16 * 2);                     \
@@ -439,32 +441,34 @@ static void check_loop_filter(void)
 
 static void check_loop_filter_intra(void)
 {
+    enum {
+        N = 35,
+    };
     LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
     LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
     LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
     H264DSPContext h;
     int bit_depth;
-    int alphas[36], betas[36];
+    int alphas[N], betas[N];
 
     declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
                       int alpha, int beta);
 
     for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
-        int i, j, a;
         uint32_t mask = pixel_mask_lf[bit_depth - 8];
         ff_h264dsp_init(&h, bit_depth, 1);
-        for (i = 35, a = 255; i >= 0; i--) {
+        for (int i = N, a = 255; --i >= 0;) {
             alphas[i] = a << (bit_depth - 8);
-            betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+            betas[i]  = (i + 2) / 2 << (bit_depth - 8);
             a = a*9/10;
         }
 
 #define CHECK_LOOP_FILTER(name, align, idc)                             \
         do {                                                            \
             if (check_func(h.name, #name #idc "_%dbpp", bit_depth)) {   \
-                for (j = 0; j < 36; j++) {                              \
+                for (int j = 0; j < N; ++j) {                           \
                     intptr_t off = 8 * 32 + (j & 15) * 4 * !align;      \
-                    for (i = 0; i < 1024; i+=4) {                       \
+                    for (int i = 0; i < 1024; i += 4) {                 \
                         AV_WN32A(dst + i, rnd() & mask);                \
                     }                                                   \
                     memcpy(dst0, dst, 32 * 16 * 2);                     \
-- 
2.52.0


>From fdc4aa940af0857e4d8ca53bf7d9f5174f7ba31b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 09:05:04 +0100
Subject: [PATCH 03/13] avcodec/x86/h264_deblock: Remove always-false branches

These functions are always called with alpha and beta > 0.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 98a0867102..075a6325e1 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -699,10 +699,8 @@ cglobal deblock_%1_luma_intra_8, 
4,6,16,ARCH_X86_64*0x50-0x50
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
     dec     r2d        ; alpha-1
-    jl .end
     neg     r4
     dec     r3d        ; beta-1
-    jl .end
     add     r4, r0     ; pix-4*stride
     mova    p1, [r4+2*r1]
     mova    p0, [r4+r5]
@@ -743,7 +741,6 @@ cglobal deblock_%1_luma_intra_8, 
4,6,16,ARCH_X86_64*0x50-0x50
     LUMA_INTRA_P012 [r4+r5], [r4+2*r1], [r4+r1], [r4]
     LUMA_INTRA_SWAP_PQ
     LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
-.end:
     RET
 
 INIT_MMX cpuname
-- 
2.52.0


>From 56a6f73a2c6887f977e6528fd2650b15475c1a29 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 11:09:40 +0100
Subject: [PATCH 04/13] avcodec/x86/h264_deblock: Simplify splatting

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 075a6325e1..d5610691d2 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -182,10 +182,16 @@ cextern pb_3
 %macro LOAD_MASK 2-3
     movd     m4, %1
     movd     m5, %2
+%if cpuflag(ssse3)
+    pxor     m6, m6
+    pshufb   m4, m6
+    pshufb   m5, m6
+%else
     SPLATW   m4, m4
     SPLATW   m5, m5
     packuswb m4, m4  ; 16x alpha-1
     packuswb m5, m5  ; 16x beta-1
+%endif
 %if %0>2
     mova     %3, m4
 %endif
-- 
2.52.0


>From 3175f4d1c5eeab6b8c35ec36cef2927caa36a6a0 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 11:35:00 +0100
Subject: [PATCH 05/13] avcodec/x86/h264_deblock: Remove obsolete macro
 parameters

They are a remnant of the MMX functions (which processed
only eight pixels at a time, so that it was called twice
via a wrapper; the actual MMX function had "v8" in its name
instead of simply v) which have been removed in commit
4618f36a2424a3a4d5760afabc2e9dd18d73f0a4.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 35 ++++++++++++---------------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index d5610691d2..3a343a2afb 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -445,12 +445,12 @@ DEBLOCK_LUMA
 
 %else
 
-%macro DEBLOCK_LUMA 2
+%macro DEBLOCK_LUMA 1
 ;-----------------------------------------------------------------------------
-; void ff_deblock_v8_luma(uint8_t *pix, int stride, int alpha, int beta,
-;                         int8_t *tc0)
+; void ff_deblock_v_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
+;                        int8_t *tc0)
 ;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_8, 5,5,8,2*%2
+cglobal deblock_v_luma_8, 5,5,8,2*%1
     lea     r4, [r1*3]
     dec     r2     ; alpha-1
     neg     r4
@@ -468,7 +468,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
     movd    m4, [r3] ; tc0
     punpcklbw m4, m4
     punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
-    mova   [esp+%2], m4 ; tc
+    mova   [esp+%1], m4 ; tc
     pcmpgtb m4, m3
     mova    m3, [r4] ; p2
     pand    m4, m7
@@ -476,7 +476,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
 
     DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
     pand    m6, m4
-    pand    m4, [esp+%2] ; tc
+    pand    m4, [esp+%1] ; tc
     psubb   m7, m4, m6
     pand    m6, m4
     LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
@@ -484,7 +484,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
     mova    m4, [r0+2*r1] ; q2
     DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
     pand    m6, [esp] ; mask
-    mova    m5, [esp+%2] ; tc
+    mova    m5, [esp+%1] ; tc
     psubb   m7, m6
     pand    m5, m6
     mova    m3, [r0+r1]
@@ -521,12 +521,7 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
     PUSH   dword r2m
     PUSH   dword 16
     PUSH   dword r0
-    call   deblock_%1_luma_8
-%ifidn %1, v8
-    add    dword [esp   ], 8 ; pix_tmp+0x38
-    add    dword [esp+16], 2 ; tc0+2
-    call   deblock_%1_luma_8
-%endif
+    call   deblock_v_luma_8
     ADD    esp, 20
 
     ; transpose 16x4 -> original space  (only the middle 4 rows were changed 
by the filter)
@@ -552,10 +547,10 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
 %endmacro ; DEBLOCK_LUMA
 
 INIT_XMM sse2
-DEBLOCK_LUMA v, 16
+DEBLOCK_LUMA 16
 %if HAVE_AVX_EXTERNAL
 INIT_XMM avx
-DEBLOCK_LUMA v, 16
+DEBLOCK_LUMA 16
 %endif
 
 %endif ; ARCH
@@ -698,9 +693,9 @@ DEBLOCK_LUMA v, 16
 ; void ff_deblock_v_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
 ;-----------------------------------------------------------------------------
 %if WIN64
-cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
+cglobal deblock_v_luma_intra_8, 4,6,16,0x10
 %else
-cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
+cglobal deblock_v_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
 %endif
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
@@ -802,11 +797,7 @@ cglobal deblock_h_luma_intra_8, 2,4,8,0x80
     PUSH   dword r2m
     PUSH   dword 16
     PUSH   r0
-    call   deblock_%1_luma_intra_8
-%ifidn %1, v8
-    add    dword [rsp], 8 ; pix_tmp+8
-    call   deblock_%1_luma_intra_8
-%endif
+    call   deblock_v_luma_intra_8
     ADD    esp, 16
 
     mov    r1,  r1m
-- 
2.52.0


>From 888d08cc19f5f0ebb2d6c91dff7e4669a0c26586 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 12:46:22 +0100
Subject: [PATCH 06/13] avcodec/x86/h264_deblock_10bit: Remove custom stack
 allocation code

Allocate it via cglobal as usual. This makes the SSE2/AVX functions
available when HAVE_ALIGNED_STACK is false; it also avoids
modifying rsp unnecessarily in the deblock_h_luma_intra_10 functions
on Win64.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock_10bit.asm | 23 +++++------------------
 libavcodec/x86/h264dsp_init.c         |  4 ----
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index 033f2f4d55..1ea5ce4b28 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -153,14 +153,12 @@ cextern pw_1023
 ; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta,
 ;                           int8_t *tc0)
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
-    %assign pad 5*mmsize+12-(stack_offset&15)
+cglobal deblock_v_luma_10, 5,5,8,-5*mmsize
     %define tcm [rsp]
     %define ms1 [rsp+mmsize]
     %define ms2 [rsp+mmsize*2]
     %define am  [rsp+mmsize*3]
     %define bm  [rsp+mmsize*4]
-    SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB     m4, m5, r2d, r3d
@@ -205,11 +203,9 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
     add         r4, mmsize/8
     dec         r3
     jg .loop
-    ADD         rsp, pad
     RET
 
-cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
-    %assign pad 7*mmsize+12-(stack_offset&15)
+cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
     %define tcm [rsp]
     %define ms1 [rsp+mmsize]
     %define ms2 [rsp+mmsize*2]
@@ -217,7 +213,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
     %define p2m [rsp+mmsize*4]
     %define am  [rsp+mmsize*5]
     %define bm  [rsp+mmsize*6]
-    SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB     m4, m5, r2d, r3d
@@ -295,7 +290,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
     lea         r2, [r2+r1*(mmsize/2)]
     dec         r5
     jg .loop
-    ADD        rsp, pad
     RET
 %endmacro
 
@@ -482,7 +476,6 @@ DEBLOCK_LUMA_64
 %endmacro
 
 %macro LUMA_INTRA_INIT 1
-    %xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15)
     %define t0 m4
     %define t1 m5
     %define t2 m6
@@ -492,7 +485,6 @@ DEBLOCK_LUMA_64
     CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
     %assign i i+1
 %endrep
-    SUB    rsp, pad
 %endmacro
 
 ; in: %1-%3=tmp, %4=p2, %5=q2
@@ -654,7 +646,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                 int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10, 4,7,16
+cglobal deblock_h_luma_intra_10, 4,7,16,mmsize
     %define t0 m15
     %define t1 m14
     %define t2 m2
@@ -667,8 +659,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16
     %define p2 m13
     %define p3 m4
     %define spill [rsp]
-    %assign pad 24-(stack_offset&15)
-    SUB     rsp, pad
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
     add     r4, r0     ; pix+4*stride
@@ -709,7 +699,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16
     lea     r4, [r4+r1*8]
     dec     r6
     jg .loop
-    ADD    rsp, pad
     RET
 %endmacro
 
@@ -727,7 +716,7 @@ DEBLOCK_LUMA_INTRA_64
 ; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                 int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
+cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
     LUMA_INTRA_INIT 3
     lea     r4, [r1*4]
     lea     r5, [r1*3]
@@ -749,14 +738,13 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
     add     r4, mmsize
     dec     r6
     jg .loop
-    ADD    rsp, pad
     RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                 int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
+cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
     LUMA_INTRA_INIT 8
 %if mmsize == 8
     lea     r4, [r1*3]
@@ -793,7 +781,6 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
     dec     r6
 %endif
     jg .loop
-    ADD    rsp, pad
     RET
 %endmacro
 
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 66c2f36908..a62de09577 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -314,12 +314,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
             } else {
                 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
             }
-#if HAVE_ALIGNED_STACK
             c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
             c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
             c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
             c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
-#endif /* HAVE_ALIGNED_STACK */
         }
         if (EXTERNAL_SSE4(cpu_flags)) {
             c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
@@ -354,12 +352,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
             } else {
                 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
             }
-#if HAVE_ALIGNED_STACK
             c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
             c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
             c->h264_v_loop_filter_luma_intra   = 
ff_deblock_v_luma_intra_10_avx;
             c->h264_h_loop_filter_luma_intra   = 
ff_deblock_h_luma_intra_10_avx;
-#endif /* HAVE_ALIGNED_STACK */
         }
     }
 }
-- 
2.52.0


>From dfa0bbd5e15a13cd96a46db01d427bd20fa12227 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 13:07:55 +0100
Subject: [PATCH 07/13] avcodec/x86/h264_deblock_10bit: Remove mmxext functions

Now that the SSE2/AVX functions are no longer restricted
to those systems having an aligned stack, the MMXEXT functions
are always overridden (except for ancient systems without
SSE2), so remove them.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock_10bit.asm | 103 ++------------------------
 libavcodec/x86/h264dsp_init.c         |  11 ---
 2 files changed, 7 insertions(+), 107 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index 1ea5ce4b28..ca5d9ff3b7 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -1,5 +1,5 @@
 ;*****************************************************************************
-;* MMX/SSE2/AVX-optimized 10-bit H.264 deblocking code
+;* SSE2/AVX-optimized 10-bit H.264 deblocking code
 ;*****************************************************************************
 ;* Copyright (C) 2005-2011 x264 project
 ;*
@@ -65,12 +65,8 @@ cextern pw_1023
 %macro LOAD_TC 2
     movd        %1, [%2]
     punpcklbw   %1, %1
-%if mmsize == 8
-    pshufw      %1, %1, 0
-%else
     pshuflw     %1, %1, 01010000b
     pshufd      %1, %1, 01010000b
-%endif
     psraw       %1, 6
 %endmacro
 
@@ -131,12 +127,6 @@ cextern pw_1023
 %endmacro
 
 %macro LUMA_H_STORE 2
-%if mmsize == 8
-    movq        [r0-4], m0
-    movq        [r0+r1-4], m1
-    movq        [r0+r1*2-4], m2
-    movq        [r0+%2-4], m3
-%else
     movq        [r0-4], m0
     movhps      [r0+r1-4], m0
     movq        [r0+r1*2-4], m1
@@ -145,7 +135,6 @@ cextern pw_1023
     movhps      [%1+r1*2-4], m2
     movq        [%1+%2-4], m3
     movhps      [%1+r1*4-4], m3
-%endif
 %endmacro
 
 %macro DEBLOCK_LUMA 0
@@ -222,24 +211,9 @@ cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
     mov         r5, 32/mmsize
     mova        bm, m5
     add         r3, r1
-%if mmsize == 16
     mov         r2, r0
     add         r2, r3
-%endif
 .loop:
-%if mmsize == 8
-    movq        m2, [r0-8]     ; y q2 q1 q0
-    movq        m7, [r0+0]
-    movq        m5, [r0+r1-8]
-    movq        m3, [r0+r1+0]
-    movq        m0, [r0+r1*2-8]
-    movq        m6, [r0+r1*2+0]
-    movq        m1, [r0+r3-8]
-    TRANSPOSE4x4W 2, 5, 0, 1, 4
-    SWAP         2, 7
-    movq        m7, [r0+r3]
-    TRANSPOSE4x4W 2, 3, 6, 7, 4
-%else
     movu        m5, [r0-8]     ; y q2 q1 q0 p0 p1 p2 x
     movu        m0, [r0+r1-8]
     movu        m2, [r0+r1*2-8]
@@ -258,7 +232,6 @@ cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
     punpckhqdq  m5, m4
     SBUTTERFLY qdq, 0, 1, 7
     SBUTTERFLY qdq, 2, 3, 7
-%endif
 
     mova       p2m, m6
     LOAD_MASK   m0, m1, m2, m3, am, bm, m7, m4, m6
@@ -515,23 +488,6 @@ DEBLOCK_LUMA_64
 %endmacro
 
 %macro LUMA_H_INTRA_LOAD 0
-%if mmsize == 8
-    movu    t0, [r0-8]
-    movu    t1, [r0+r1-8]
-    movu    m0, [r0+r1*2-8]
-    movu    m1, [r0+r4-8]
-    TRANSPOSE4x4W 4, 5, 0, 1, 2
-    mova    t4, t0        ; p3
-    mova    t5, t1        ; p2
-
-    movu    m2, [r0]
-    movu    m3, [r0+r1]
-    movu    t0, [r0+r1*2]
-    movu    t1, [r0+r4]
-    TRANSPOSE4x4W 2, 3, 4, 5, 6
-    mova    t6, t0        ; q2
-    mova    t7, t1        ; q3
-%else
     movu    t0, [r0-8]
     movu    t1, [r0+r1-8]
     movu    m0, [r0+r1*2-8]
@@ -545,24 +501,10 @@ DEBLOCK_LUMA_64
     mova    t5, t1        ; p2
     mova    t6, t2        ; q2
     mova    t7, t3        ; q3
-%endif
 %endmacro
 
 ; in: %1=q3 %2=q2' %3=q1' %4=q0' %5=p0' %6=p1' %7=p2' %8=p3 %9=tmp
 %macro LUMA_H_INTRA_STORE 9
-%if mmsize == 8
-    TRANSPOSE4x4W %1, %2, %3, %4, %9
-    movq       [r0-8], m%1
-    movq       [r0+r1-8], m%2
-    movq       [r0+r1*2-8], m%3
-    movq       [r0+r4-8], m%4
-    movq       m%1, %8
-    TRANSPOSE4x4W %5, %6, %7, %1, %9
-    movq       [r0], m%5
-    movq       [r0+r1], m%6
-    movq       [r0+r1*2], m%7
-    movq       [r0+r4], m%1
-%else
     TRANSPOSE2x4x4W %1, %2, %3, %4, %9
     movq       [r0-8], m%1
     movq       [r0+r1-8], m%2
@@ -586,7 +528,6 @@ DEBLOCK_LUMA_64
     movhps     [r4+r1], m%6
     movhps     [r4+r1*2], m%7
     movhps     [r4+r5], m%1
-%endif
 %endmacro
 
 %if ARCH_X86_64
@@ -746,15 +687,10 @@ cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
 ;-----------------------------------------------------------------------------
 cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
     LUMA_INTRA_INIT 8
-%if mmsize == 8
-    lea     r4, [r1*3]
-    mov     r5, 32/mmsize
-%else
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
     add     r4, r0     ; pix+4*stride
     mov     r6, 32/mmsize
-%endif
     shl    r2d, 2
     shl    r3d, 2
 .loop:
@@ -774,22 +710,13 @@ cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
     LUMA_H_INTRA_STORE 2, 0, 1, 3, 4, 6, 5, t7, 7
 
     lea     r0, [r0+r1*(mmsize/2)]
-%if mmsize == 8
-    dec     r5
-%else
     lea     r4, [r4+r1*(mmsize/2)]
     dec     r6
-%endif
     jg .loop
     RET
 %endmacro
 
 %if ARCH_X86_64 == 0
-%if HAVE_ALIGNED_STACK == 0
-INIT_MMX mmxext
-DEBLOCK_LUMA
-DEBLOCK_LUMA_INTRA
-%endif
 INIT_XMM sse2
 DEBLOCK_LUMA
 DEBLOCK_LUMA_INTRA
@@ -876,37 +803,21 @@ DEBLOCK_LUMA_INTRA
 %endmacro
 
 ; %1 = base + 3*stride
-; %2 = 3*stride (unused on mmx)
+; %2 = 3*stride
 ; %3, %4 = place to store p1 and q1 values
 %macro CHROMA_H_LOAD 4
-    %if mmsize == 8
-        movq m0, [pix_q - 4]
-        movq m1, [pix_q +   stride_q - 4]
-        movq m2, [pix_q + 2*stride_q - 4]
-        movq m3, [%1 - 4]
-        TRANSPOSE4x4W 0, 1, 2, 3, 4
-    %else
-        TRANSPOSE4x8W_LOAD PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
-    %endif
+    TRANSPOSE4x8W_LOAD PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
     mova %3, m0
     mova %4, m3
 %endmacro
 
 ; %1 = base + 3*stride
-; %2 = 3*stride (unused on mmx)
+; %2 = 3*stride
 ; %3, %4 = place to load p1 and q1 values
 %macro CHROMA_H_STORE 4
     mova m0, %3
     mova m3, %4
-    %if mmsize == 8
-        TRANSPOSE4x4W 0, 1, 2, 3, 4
-        movq [pix_q - 4],              m0
-        movq [pix_q +   stride_q - 4], m1
-        movq [pix_q + 2*stride_q - 4], m2
-        movq [%1 - 4],                 m3
-    %else
-        TRANSPOSE8x4W_STORE PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
-    %endif
+    TRANSPOSE8x4W_STORE PASS8ROWS(pix_q-4, %1-4, stride_q, %2)
 %endmacro
 
 %macro CHROMA_V_LOAD_TC 2
@@ -921,7 +832,7 @@ DEBLOCK_LUMA_INTRA
 ; void ff_deblock_v_chroma_10(uint16_t *pix, int stride, int alpha, int beta,
 ;                             int8_t *tc0)
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
+cglobal deblock_v_chroma_10, 5,6,8
     mov         r5, r0
     sub         r0, r1
     sub         r0, r1
@@ -943,7 +854,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
 ; void ff_deblock_v_chroma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                   int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
+cglobal deblock_v_chroma_intra_10, 4,5,8
     mov         r4, r0
     sub         r0, r1
     sub         r0, r1
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index a62de09577..1ee1ee4367 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -127,11 +127,6 @@ LF_FUNC(h, luma_mbaff, 8, avx)
 LF_FUNCS(uint8_t,   8)
 LF_FUNCS(uint16_t, 10)
 
-LF_FUNC(v,  luma,       10, mmxext)
-LF_FUNC(h,  luma,       10, mmxext)
-LF_IFUNC(v, luma_intra, 10, mmxext)
-LF_IFUNC(h, luma_intra, 10, mmxext)
-
 /***********************************/
 /* weighted prediction */
 
@@ -275,12 +270,6 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
         }
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
-            c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmxext;
-            c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmxext;
-            c->h264_v_loop_filter_luma_intra   = 
ff_deblock_v_luma_intra_10_mmxext;
-            c->h264_h_loop_filter_luma_intra   = 
ff_deblock_h_luma_intra_10_mmxext;
-#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
             c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
-- 
2.52.0


>From e54aa2d15536fbe58d9a170871166fb6f7130fd1 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 13:32:29 +0100
Subject: [PATCH 08/13] avcodec/x86/h264_deblock_10bit: Simplify r0+4*r1

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock_10bit.asm | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index ca5d9ff3b7..7b95754c89 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -600,9 +600,8 @@ cglobal deblock_h_luma_intra_10, 4,7,16,mmsize
     %define p2 m13
     %define p3 m4
     %define spill [rsp]
-    lea     r4, [r1*4]
+    lea     r4, [r0+r1*4] ; pix+4*stride
     lea     r5, [r1*3] ; 3*stride
-    add     r4, r0     ; pix+4*stride
     mov     r6, 2
     mova    m0, [pw_2]
     shl    r2d, 2
@@ -687,9 +686,8 @@ cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
 ;-----------------------------------------------------------------------------
 cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
     LUMA_INTRA_INIT 8
-    lea     r4, [r1*4]
+    lea     r4, [r0+r1*4]
     lea     r5, [r1*3] ; 3*stride
-    add     r4, r0     ; pix+4*stride
     mov     r6, 32/mmsize
     shl    r2d, 2
     shl    r3d, 2
-- 
2.52.0


>From 494821be5d9f93a7b553fe9047139c0da05da589 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 17:15:08 +0100
Subject: [PATCH 09/13] avcodec/x86/h264_deblock: Avoid reloading constant

No change in benchmarks.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 3a343a2afb..94d0771384 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -219,14 +219,14 @@ cextern pb_3
     pavgb   m3, m5
     mova    m6, [pb_A1]
     paddusb m3, m4       ; d+128+33
-    psubusb m6, m3
-    psubusb m3, [pb_A1]
-    pminub  m6, m7
+    psubusb m4, m6, m3
+    psubusb m3, m6
+    pminub  m4, m7
     pminub  m3, m7
-    psubusb m1, m6
+    psubusb m1, m4
     psubusb m2, m3
     paddusb m1, m3
-    paddusb m2, m6
+    paddusb m2, m4
 %endmacro
 
 ; in: m1=p0 m2=q0
-- 
2.52.0


>From 707738d00097b9b9f27dd2cad9bc41459e6423fe Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 15:11:06 +0100
Subject: [PATCH 10/13] avcodec/x86/h264_deblock: Avoid MMX in deblock_h_luma_8

Old benchmarks:
h264_h_loop_filter_luma_8bpp_c:                         59.9 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2:                      67.9 ( 0.88x)
h264_h_loop_filter_luma_8bpp_avx:                       67.4 ( 0.89x)

New benchmarks:
h264_h_loop_filter_luma_8bpp_c:                         60.0 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2:                      65.4 ( 0.92x)
h264_h_loop_filter_luma_8bpp_avx:                       65.3 ( 0.92x)

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 107 ++++++++++++++++----------------
 1 file changed, 53 insertions(+), 54 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 94d0771384..c81fb36494 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -37,35 +37,25 @@ cextern pb_0
 cextern pb_1
 cextern pb_3
 
-; in: 4 rows of 8 bytes in m0..m3
-; out: 8 rows of 4 bytes in %1..%8
-%macro TRANSPOSE8x4B_STORE 8
-    punpckhdq  m4, m0, m0
-    punpckhdq  m5, m1, m1
-    punpckhdq  m6, m2, m2
-
-    punpcklbw  m0, m1
-    punpcklbw  m2, m3
-    punpcklwd  m1, m0, m2
-    punpckhwd  m0, m2
-    movh       %1, m1
-    punpckhdq  m1, m1
-    movh       %2, m1
-    movh       %3, m0
-    punpckhdq  m0, m0
-    movh       %4, m0
-
-    punpckhdq  m3, m3
-    punpcklbw  m4, m5
-    punpcklbw  m6, m3
-    punpcklwd  m5, m4, m6
-    punpckhwd  m4, m6
-    movh       %5, m5
-    punpckhdq  m5, m5
-    movh       %6, m5
-    movh       %7, m4
-    punpckhdq  m4, m4
-    movh       %8, m4
+; in: 2 rows of 8 words in %1, %2
+; out: 8 rows of 4 bytes in %3..%10
+%macro TRANSPOSE8x4B_STORE 10
+    punpcklwd   m6, %1, %2
+    movd        %3, m6
+    pshufd      m7, m6, 00110001b
+    punpckhqdq  m6, m6
+    movd        %4, m7
+    punpckhqdq  m7, m7
+    punpckhwd   %1, %2
+    movd        %5, m6
+    movd        %6, m7
+    pshufd      m6, %1, 00110001b
+    movd        %7, %1
+    punpckhqdq  %1, %1
+    movd        %8, m6
+    punpckhqdq  m6, m6
+    movd        %9, %1
+    movd       %10, m6
 %endmacro
 
 %macro SBUTTERFLY3 4
@@ -325,25 +315,30 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 %endif
     call   deblock_v_luma_8
 
-    ; transpose 16x4 -> original space  (only the middle 4 rows were changed 
by the filter)
     add    r6, 2
     add    r5, 2
-    movq   m0, [pix_tmp+0x18]
-    movq   m1, [pix_tmp+0x28]
-    movq   m2, [pix_tmp+0x38]
-    movq   m3, [pix_tmp+0x48]
-    TRANSPOSE8x4B_STORE  PASS8ROWS(r6, r5, r7, r8)
 
+    INIT_XMM cpuname
+
+    ; transpose 16x4 (only the middle 4 rows were changed by the filter)
+    mova       m0, [pix_tmp+0x10]
+    mova       m1, [pix_tmp+0x20]
+    mova       m2, [pix_tmp+0x30]
+    mova       m3, [pix_tmp+0x40]
+
+    punpckhbw  m4, m0, m1
+    punpckhbw  m5, m2, m3
+
+    TRANSPOSE8x4B_STORE m4, m5, PASS8ROWS(r6, r5, r7, r8)
+
+    punpcklbw  m0, m1
+    punpcklbw  m2, m3
     shl    r7,  3
     sub    r6,  r7
     sub    r5,  r7
     shr    r7,  3
-    movq   m0, [pix_tmp+0x10]
-    movq   m1, [pix_tmp+0x20]
-    movq   m2, [pix_tmp+0x30]
-    movq   m3, [pix_tmp+0x40]
-    TRANSPOSE8x4B_STORE  PASS8ROWS(r6, r5, r7, r8)
 
+    TRANSPOSE8x4B_STORE m0, m2, PASS8ROWS(r6, r5, r7, r8)
     RET
 %endmacro
 
@@ -524,24 +519,28 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
     call   deblock_v_luma_8
     ADD    esp, 20
 
-    ; transpose 16x4 -> original space  (only the middle 4 rows were changed 
by the filter)
-    mov    r0, r0mp
-    sub    r0, 2
+    INIT_XMM cpuname
 
-    movq   m0, [pix_tmp+0x10]
-    movq   m1, [pix_tmp+0x20]
-    lea    r1, [r0+r4]
-    movq   m2, [pix_tmp+0x30]
-    movq   m3, [pix_tmp+0x40]
-    TRANSPOSE8x4B_STORE  PASS8ROWS(r0, r1, r3, r4)
+    ; transpose 16x4 (only the middle 4 rows were changed by the filter)
+    mova       m0, [pix_tmp+0x10]
+    mova       m1, [pix_tmp+0x20]
+    mova       m2, [pix_tmp+0x30]
+    mova       m3, [pix_tmp+0x40]
 
+    mov        r0, r0mp
+    punpcklbw  m4, m0, m1
+    sub        r0, 2
+    punpcklbw  m5, m2, m3
+    lea        r1, [r0+r4]
+
+    TRANSPOSE8x4B_STORE m4, m5, PASS8ROWS(r0, r1, r3, r4)
+
+    punpckhbw   m0, m1
     lea    r0, [r0+r3*8]
+    punpckhbw   m2, m3
     lea    r1, [r1+r3*8]
-    movq   m0, [pix_tmp+0x18]
-    movq   m1, [pix_tmp+0x28]
-    movq   m2, [pix_tmp+0x38]
-    movq   m3, [pix_tmp+0x48]
-    TRANSPOSE8x4B_STORE  PASS8ROWS(r0, r1, r3, r4)
+
+    TRANSPOSE8x4B_STORE m0, m2, PASS8ROWS(r0, r1, r3, r4)
 
     RET
 %endmacro ; DEBLOCK_LUMA
-- 
2.52.0


>From 94dc8e969e7314dcaa5f1e6fe79bf19aeaef24e5 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 16:27:33 +0100
Subject: [PATCH 11/13] avcodec/x86/h264_deblock: Avoid reload

Old benchmarks:
h264_h_loop_filter_luma_8bpp_c:                         60.0 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2:                      65.4 ( 0.92x)
h264_h_loop_filter_luma_8bpp_avx:                       65.3 ( 0.92x)

New benchmarks:
h264_h_loop_filter_luma_8bpp_c:                         60.4 ( 1.00x)
h264_h_loop_filter_luma_8bpp_sse2:                      62.0 ( 0.97x)
h264_h_loop_filter_luma_8bpp_avx:                       61.7 ( 0.98x)

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index c81fb36494..d4c7ce73ee 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -322,8 +322,7 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 
     ; transpose 16x4 (only the middle 4 rows were changed by the filter)
     mova       m0, [pix_tmp+0x10]
-    mova       m1, [pix_tmp+0x20]
-    mova       m2, [pix_tmp+0x30]
+    ; the two middle rows are still in the proper registers
     mova       m3, [pix_tmp+0x40]
 
     punpckhbw  m4, m0, m1
@@ -523,8 +522,7 @@ cglobal deblock_h_luma_8, 0,5,8,0x60+12
 
     ; transpose 16x4 (only the middle 4 rows were changed by the filter)
     mova       m0, [pix_tmp+0x10]
-    mova       m1, [pix_tmp+0x20]
-    mova       m2, [pix_tmp+0x30]
+    ; the two middle rows are still in the proper registers
     mova       m3, [pix_tmp+0x40]
 
     mov        r0, r0mp
-- 
2.52.0


>From 16a2a9244ebc0c4a0d1f121332d93cf5353bda8c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Thu, 8 Jan 2026 15:20:18 +0100
Subject: [PATCH 12/13] avcodec/x86/h264_deblock: Don't sign-extend stride

Unnecessary (and wrong) since d5d699ab6e6f8a8290748d107416fd5c19757a1b.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock.asm | 39 +++++++++++++++------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index d4c7ce73ee..d4a4033fc9 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -238,7 +238,7 @@ cextern pb_3
 
 %if ARCH_X86_64
 ;-----------------------------------------------------------------------------
-; void ff_deblock_v_luma(uint8_t *pix, int stride, int alpha, int beta,
+; void ff_deblock_v_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
 ;                        int8_t *tc0)
 ;-----------------------------------------------------------------------------
 %macro DEBLOCK_LUMA 0
@@ -284,15 +284,15 @@ cglobal deblock_v_luma_8, 5,5,10, pix_, stride_, alpha_, 
beta_, base3_
     RET
 
 ;-----------------------------------------------------------------------------
-; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta,
+; void ff_deblock_h_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
 ;                        int8_t *tc0)
 ;-----------------------------------------------------------------------------
 INIT_MMX cpuname
-cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
-    movsxd r7,  r1d
-    lea    r8,  [r7+r7*2]
+cglobal deblock_h_luma_8, 5,9,8,0x60+16*WIN64
+    lea    r8,  [r1+r1*2]
     lea    r6,  [r0-4]
     lea    r5,  [r0-4+r8]
+    mov    r7,  r1
 %if WIN64
     %define pix_tmp rsp+0x30 ; shadow space + r4
 %else
@@ -300,10 +300,10 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 %endif
 
     ; transpose 6x16 -> tmp space
-    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r7, r8), pix_tmp
-    lea    r6, [r6+r7*8]
-    lea    r5, [r5+r7*8]
-    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r7, r8), pix_tmp+8
+    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r1, r8), pix_tmp
+    lea    r6, [r6+r1*8]
+    lea    r5, [r5+r1*8]
+    TRANSPOSE6x8_MEM  PASS8ROWS(r6, r5, r1, r8), pix_tmp+8
 
     ; vertical filter
     ; alpha, beta, tc0 are still in r2d, r3d, r4
@@ -344,7 +344,6 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 %macro DEBLOCK_H_LUMA_MBAFF 0
 
 cglobal deblock_h_luma_mbaff_8, 5, 9, 10, 8*16, pix_, stride_, alpha_, beta_, 
tc0_, base3_, stride3_
-    movsxd stride_q,   stride_d
     dec    alpha_d
     dec    beta_d
     mov    base3_q,    pix_q
@@ -490,7 +489,7 @@ cglobal deblock_v_luma_8, 5,5,8,2*%1
     RET
 
 ;-----------------------------------------------------------------------------
-; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta,
+; void ff_deblock_h_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
 ;                        int8_t *tc0)
 ;-----------------------------------------------------------------------------
 INIT_MMX cpuname
@@ -687,7 +686,7 @@ DEBLOCK_LUMA 16
 %endif
 
 ;-----------------------------------------------------------------------------
-; void ff_deblock_v_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
+; void ff_deblock_v_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int 
beta)
 ;-----------------------------------------------------------------------------
 %if WIN64
 cglobal deblock_v_luma_intra_8, 4,6,16,0x10
@@ -744,13 +743,13 @@ cglobal deblock_v_luma_intra_8, 
4,6,16,ARCH_X86_64*0x50-0x50
 INIT_MMX cpuname
 %if ARCH_X86_64
 ;-----------------------------------------------------------------------------
-; void ff_deblock_h_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
+; void ff_deblock_h_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int 
beta)
 ;-----------------------------------------------------------------------------
 cglobal deblock_h_luma_intra_8, 4,9,0,0x80
-    movsxd r7,  r1d
-    lea    r8,  [r7*3]
+    lea    r8,  [r1*3]
     lea    r6,  [r0-4]
     lea    r5,  [r0-4+r8]
+    mov    r7,  r1
 %if WIN64
     %define pix_tmp rsp+0x20 ; shadow space
 %else
@@ -758,10 +757,10 @@ cglobal deblock_h_luma_intra_8, 4,9,0,0x80
 %endif
 
     ; transpose 8x16 -> tmp space
-    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, 
pix_tmp+0x30, 0x10, 0x30)
-    lea    r6, [r6+r7*8]
-    lea    r5, [r5+r7*8]
-    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp+8, 
pix_tmp+0x38, 0x10, 0x30)
+    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r1, r8), PASS8ROWS(pix_tmp, 
pix_tmp+0x30, 0x10, 0x30)
+    lea    r6, [r6+r1*8]
+    lea    r5, [r5+r1*8]
+    TRANSPOSE8x8_MEM  PASS8ROWS(r6, r5, r1, r8), PASS8ROWS(pix_tmp+8, 
pix_tmp+0x38, 0x10, 0x30)
 
     lea    r0,  [pix_tmp+0x40]
     mov    r1,  0x10
@@ -899,7 +898,6 @@ DEBLOCK_LUMA_INTRA v
 %endmacro
 
 %macro CHROMA_V_START_XMM 1
-    movsxdifnidn stride_q, stride_d
     dec alpha_d
     dec beta_d
     mov %1, pix_q
@@ -908,7 +906,6 @@ DEBLOCK_LUMA_INTRA v
 %endmacro
 
 %macro CHROMA_H_START_XMM 2
-    movsxdifnidn stride_q, stride_d
     dec alpha_d
     dec beta_d
     lea %2, [3*stride_q]
-- 
2.52.0


>From 78ac51eebb725bfcc992f923e8b895d47879a0b2 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 7 Jan 2026 20:35:15 +0100
Subject: [PATCH 13/13] avcodec/h264dsp: Remove redundant h264 from H264DSPCtx
 member names

These names are a remnant of dsputil when all the DSP functions
from all codecs were part of DSPcontext.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/aarch64/h264dsp_init_aarch64.c     |  72 +++----
 libavcodec/arm/h264dsp_init_arm.c             |  38 ++--
 libavcodec/h264_loopfilter.c                  |  30 +--
 libavcodec/h264_mb.c                          |  54 +++---
 libavcodec/h264_mb_template.c                 |  28 +--
 libavcodec/h264dsp.c                          |  78 ++++----
 libavcodec/h264dsp.h                          | 114 +++++------
 libavcodec/loongarch/h264dsp_init_loongarch.c |  86 ++++-----
 libavcodec/mips/h264dsp_init_mips.c           | 108 +++++------
 libavcodec/ppc/h264dsp.c                      |  28 +--
 libavcodec/riscv/h264dsp_init.c               |  88 ++++-----
 libavcodec/svq3.c                             |   8 +-
 libavcodec/x86/h264dsp_init.c                 | 178 +++++++++---------
 tests/checkasm/h264dsp.c                      |  62 +++---
 14 files changed, 486 insertions(+), 486 deletions(-)

diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c 
b/libavcodec/aarch64/h264dsp_init_aarch64.c
index 6bf3ecb8a1..c684574320 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -112,55 +112,55 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, 
const int bit_depth,
     int cpu_flags = av_get_cpu_flags();
 
     if (have_neon(cpu_flags) && bit_depth == 8) {
-        c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
-        c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
-        c->h264_v_loop_filter_luma_intra= 
ff_h264_v_loop_filter_luma_intra_neon;
-        c->h264_h_loop_filter_luma_intra= 
ff_h264_h_loop_filter_luma_intra_neon;
+        c->v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
+        c->h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+        c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
+        c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
 
-        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
-        c->h264_v_loop_filter_chroma_intra = 
ff_h264_v_loop_filter_chroma_intra_neon;
+        c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+        c->v_loop_filter_chroma_intra = 
ff_h264_v_loop_filter_chroma_intra_neon;
 
         if (chroma_format_idc <= 1) {
-            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
-            c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
-            c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+            c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+            c->h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
+            c->h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
         } else {
-            c->h264_h_loop_filter_chroma = 
ff_h264_h_loop_filter_chroma422_neon;
-            c->h264_h_loop_filter_chroma_mbaff = 
ff_h264_h_loop_filter_chroma_neon;
-            c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma422_intra_neon;
-            c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
+            c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
+            c->h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon;
+            c->h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma422_intra_neon;
+            c->h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_intra_neon;
         }
 
-        c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
-        c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
-        c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
+        c->weight_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
+        c->weight_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
+        c->weight_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
 
-        c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
-        c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
-        c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
+        c->biweight_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
+        c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
+        c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
 
-        c->h264_idct_add        = ff_h264_idct_add_neon;
-        c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
-        c->h264_idct_add16      = ff_h264_idct_add16_neon;
-        c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+        c->idct_add        = ff_h264_idct_add_neon;
+        c->idct_dc_add     = ff_h264_idct_dc_add_neon;
+        c->idct_add16      = ff_h264_idct_add16_neon;
+        c->idct_add16intra = ff_h264_idct_add16intra_neon;
         if (chroma_format_idc <= 1)
-            c->h264_idct_add8   = ff_h264_idct_add8_neon;
-        c->h264_idct8_add       = ff_h264_idct8_add_neon;
-        c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
-        c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+            c->idct_add8   = ff_h264_idct_add8_neon;
+        c->idct8_add       = ff_h264_idct8_add_neon;
+        c->idct8_dc_add    = ff_h264_idct8_dc_add_neon;
+        c->idct8_add4      = ff_h264_idct8_add4_neon;
     } else if (have_neon(cpu_flags) && bit_depth == 10) {
-        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon_10;
-        c->h264_v_loop_filter_chroma_intra = 
ff_h264_v_loop_filter_chroma_intra_neon_10;
+        c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon_10;
+        c->v_loop_filter_chroma_intra = 
ff_h264_v_loop_filter_chroma_intra_neon_10;
 
         if (chroma_format_idc <= 1) {
-            c->h264_h_loop_filter_chroma = 
ff_h264_h_loop_filter_chroma_neon_10;
-            c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon_10;
-            c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10;
+            c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon_10;
+            c->h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma_intra_neon_10;
+            c->h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_mbaff_intra_neon_10;
         } else {
-            c->h264_h_loop_filter_chroma = 
ff_h264_h_loop_filter_chroma422_neon_10;
-            c->h264_h_loop_filter_chroma_mbaff = 
ff_h264_h_loop_filter_chroma_neon_10;
-            c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma422_intra_neon_10;
-            c->h264_h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_intra_neon_10;
+            c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon_10;
+            c->h_loop_filter_chroma_mbaff = 
ff_h264_h_loop_filter_chroma_neon_10;
+            c->h_loop_filter_chroma_intra = 
ff_h264_h_loop_filter_chroma422_intra_neon_10;
+            c->h_loop_filter_chroma_mbaff_intra = 
ff_h264_h_loop_filter_chroma_intra_neon_10;
         }
     }
 }
diff --git a/libavcodec/arm/h264dsp_init_arm.c 
b/libavcodec/arm/h264dsp_init_arm.c
index 13d499fda2..050ceb90bf 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -76,32 +76,32 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, 
const int bit_depth,
 {
 #if HAVE_NEON
     if (bit_depth == 8) {
-        c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
-        c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
-        c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+        c->v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
+        c->h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+        c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
 
         if (chroma_format_idc <= 1)
-            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+            c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
         else
-            c->h264_h_loop_filter_chroma = 
ff_h264_h_loop_filter_chroma422_neon;
+            c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
 
-        c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
-        c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
-        c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
+        c->weight_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
+        c->weight_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
+        c->weight_pixels_tab[2] = ff_weight_h264_pixels_4_neon;
 
-        c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
-        c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
-        c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
+        c->biweight_pixels_tab[0] = ff_biweight_h264_pixels_16_neon;
+        c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
+        c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
 
-        c->h264_idct_add        = ff_h264_idct_add_neon;
-        c->h264_idct_dc_add     = ff_h264_idct_dc_add_neon;
-        c->h264_idct_add16      = ff_h264_idct_add16_neon;
-        c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+        c->idct_add        = ff_h264_idct_add_neon;
+        c->idct_dc_add     = ff_h264_idct_dc_add_neon;
+        c->idct_add16      = ff_h264_idct_add16_neon;
+        c->idct_add16intra = ff_h264_idct_add16intra_neon;
         if (chroma_format_idc <= 1)
-            c->h264_idct_add8   = ff_h264_idct_add8_neon;
-        c->h264_idct8_add       = ff_h264_idct8_add_neon;
-        c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
-        c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
+            c->idct_add8   = ff_h264_idct_add8_neon;
+        c->idct8_add       = ff_h264_idct8_add_neon;
+        c->idct8_dc_add    = ff_h264_idct8_dc_add_neon;
+        c->idct8_add4      = ff_h264_idct8_add4_neon;
     }
 #endif // HAVE_NEON
 }
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index c164a289b7..e2fc886bbf 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -113,9 +113,9 @@ static av_always_inline void filter_mb_edgev(uint8_t *pix, 
int stride,
         tc[1] = tc0_table[index_a][bS[1]];
         tc[2] = tc0_table[index_a][bS[2]];
         tc[3] = tc0_table[index_a][bS[3]];
-        h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
+        h->h264dsp.h_loop_filter_luma(pix, stride, alpha, beta, tc);
     } else {
-        h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
+        h->h264dsp.h_loop_filter_luma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -135,9 +135,9 @@ static av_always_inline void filter_mb_edgecv(uint8_t *pix, 
int stride,
         tc[1] = tc0_table[index_a][bS[1]]+1;
         tc[2] = tc0_table[index_a][bS[2]]+1;
         tc[3] = tc0_table[index_a][bS[3]]+1;
-        h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
+        h->h264dsp.h_loop_filter_chroma(pix, stride, alpha, beta, tc);
     } else {
-        h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
+        h->h264dsp.h_loop_filter_chroma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -158,9 +158,9 @@ static av_always_inline void filter_mb_mbaff_edgev(const 
H264Context *h, uint8_t
         tc[1] = tc0_table[index_a][bS[1*bsi]];
         tc[2] = tc0_table[index_a][bS[2*bsi]];
         tc[3] = tc0_table[index_a][bS[3*bsi]];
-        h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
+        h->h264dsp.h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
     } else {
-        h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, 
beta);
+        h->h264dsp.h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -181,9 +181,9 @@ static av_always_inline void filter_mb_mbaff_edgecv(const 
H264Context *h,
         tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
         tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
         tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
-        h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, 
tc);
+        h->h264dsp.h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
     } else {
-        h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, 
beta);
+        h->h264dsp.h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -203,9 +203,9 @@ static av_always_inline void filter_mb_edgeh(uint8_t *pix, 
int stride,
         tc[1] = tc0_table[index_a][bS[1]];
         tc[2] = tc0_table[index_a][bS[2]];
         tc[3] = tc0_table[index_a][bS[3]];
-        h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
+        h->h264dsp.v_loop_filter_luma(pix, stride, alpha, beta, tc);
     } else {
-        h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
+        h->h264dsp.v_loop_filter_luma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -225,9 +225,9 @@ static av_always_inline void filter_mb_edgech(uint8_t *pix, 
int stride,
         tc[1] = tc0_table[index_a][bS[1]]+1;
         tc[2] = tc0_table[index_a][bS[2]]+1;
         tc[3] = tc0_table[index_a][bS[3]]+1;
-        h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
+        h->h264dsp.v_loop_filter_chroma(pix, stride, alpha, beta, tc);
     } else {
-        h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
+        h->h264dsp.v_loop_filter_chroma_intra(pix, stride, alpha, beta);
     }
 }
 
@@ -368,8 +368,8 @@ static av_always_inline void 
h264_filter_mb_fast_internal(const H264Context *h,
             int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // 
(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & 
(MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
             int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
             edges = 4 - 3*((mb_type>>3) & !(sl->cbp & 15)); //(mb_type & 
MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
-            h->h264dsp.h264_loop_filter_strength(bS, sl->non_zero_count_cache, 
sl->ref_cache, sl->mv_cache,
-                                                 sl->list_count==2, edges, 
step, mask_edge0, mask_edge1, FIELD_PICTURE(h));
+            h->h264dsp.loop_filter_strength(bS, sl->non_zero_count_cache, 
sl->ref_cache, sl->mv_cache,
+                                            sl->list_count==2, edges, step, 
mask_edge0, mask_edge1, FIELD_PICTURE(h));
         }
         if( IS_INTRA(left_type) )
             AV_WN64A(bS[0][0], 0x0004000400040004ULL);
@@ -419,7 +419,7 @@ void ff_h264_filter_mb_fast(const H264Context *h, 
H264SliceContext *sl,
                             unsigned int linesize, unsigned int uvlinesize)
 {
     av_assert2(!FRAME_MBAFF(h));
-    if(!h->h264dsp.h264_loop_filter_strength || h->ps.pps->chroma_qp_diff) {
+    if (!h->h264dsp.loop_filter_strength || h->ps.pps->chroma_qp_diff) {
         ff_h264_filter_mb(h, sl, mb_x, mb_y, img_y, img_cb, img_cr, linesize, 
uvlinesize);
         return;
     }
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 0d6562b583..67fa980de3 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -629,10 +629,10 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
         if (IS_8x8DCT(mb_type)) {
             if (transform_bypass) {
                 idct_dc_add =
-                idct_add    = h->h264dsp.h264_add_pixels8_clear;
+                idct_add    = h->h264dsp.add_pixels8_clear;
             } else {
-                idct_dc_add = h->h264dsp.h264_idct8_dc_add;
-                idct_add    = h->h264dsp.h264_idct8_add;
+                idct_dc_add = h->h264dsp.idct8_dc_add;
+                idct_add    = h->h264dsp.idct8_add;
             }
             for (i = 0; i < 16; i += 4) {
                 uint8_t *const ptr = dest_y + block_offset[i];
@@ -658,11 +658,11 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
             }
         } else {
             if (transform_bypass) {
-                idct_dc_add  =
-                idct_add     = h->h264dsp.h264_add_pixels4_clear;
+                idct_dc_add =
+                idct_add    = h->h264dsp.add_pixels4_clear;
             } else {
-                idct_dc_add = h->h264dsp.h264_idct_dc_add;
-                idct_add    = h->h264dsp.h264_idct_add;
+                idct_dc_add = h->h264dsp.idct_dc_add;
+                idct_add    = h->h264dsp.idct_add;
             }
             for (i = 0; i < 16; i++) {
                 uint8_t *const ptr = dest_y + block_offset[i];
@@ -705,9 +705,9 @@ static av_always_inline void 
hl_decode_mb_predict_luma(const H264Context *h,
         h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize);
         if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
             if (!transform_bypass)
-                h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << 
pixel_shift),
-                                                     sl->mb_luma_dc[p],
-                                                     
h->ps.pps->dequant4_coeff[p][qscale][0]);
+                h->h264dsp.luma_dc_dequant_idct(sl->mb + (p * 256 << 
pixel_shift),
+                                                sl->mb_luma_dc[p],
+                                                
h->ps.pps->dequant4_coeff[p][qscale][0]);
             else {
                 static const uint8_t dc_mapping[16] = {
                      0 * 16,  1 * 16,  4 * 16,  5 * 16,
@@ -749,21 +749,21 @@ static av_always_inline void hl_decode_mb_idct_luma(const 
H264Context *h, H264Sl
                     for (i = 0; i < 16; i++)
                         if (sl->non_zero_count_cache[scan8[i + p * 16]] ||
                             dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
-                            h->h264dsp.h264_add_pixels4_clear(dest_y + 
block_offset[i],
-                                                              sl->mb + (i * 16 
+ p * 256 << pixel_shift),
-                                                              linesize);
+                            h->h264dsp.add_pixels4_clear(dest_y + 
block_offset[i],
+                                                         sl->mb + (i * 16 + p 
* 256 << pixel_shift),
+                                                         linesize);
                 }
             } else {
-                h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
-                                                sl->mb + (p * 256 << 
pixel_shift),
-                                                linesize,
-                                                sl->non_zero_count_cache + p * 
5 * 8);
+                h->h264dsp.idct_add16intra(dest_y, block_offset,
+                                           sl->mb + (p * 256 << pixel_shift),
+                                           linesize,
+                                           sl->non_zero_count_cache + p * 5 * 
8);
             }
         } else if (sl->cbp & 15) {
             if (transform_bypass) {
                 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
-                idct_add = IS_8x8DCT(mb_type) ? 
h->h264dsp.h264_add_pixels8_clear
-                    : h->h264dsp.h264_add_pixels4_clear;
+                idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.add_pixels8_clear
+                    : h->h264dsp.add_pixels4_clear;
                 for (i = 0; i < 16; i += di)
                     if (sl->non_zero_count_cache[scan8[i + p * 16]])
                         idct_add(dest_y + block_offset[i],
@@ -771,15 +771,15 @@ static av_always_inline void hl_decode_mb_idct_luma(const 
H264Context *h, H264Sl
                                  linesize);
             } else {
                 if (IS_8x8DCT(mb_type))
-                    h->h264dsp.h264_idct8_add4(dest_y, block_offset,
-                                               sl->mb + (p * 256 << 
pixel_shift),
-                                               linesize,
-                                               sl->non_zero_count_cache + p * 
5 * 8);
+                    h->h264dsp.idct8_add4(dest_y, block_offset,
+                                          sl->mb + (p * 256 << pixel_shift),
+                                          linesize,
+                                          sl->non_zero_count_cache + p * 5 * 
8);
                 else
-                    h->h264dsp.h264_idct_add16(dest_y, block_offset,
-                                               sl->mb + (p * 256 << 
pixel_shift),
-                                               linesize,
-                                               sl->non_zero_count_cache + p * 
5 * 8);
+                    h->h264dsp.idct_add16(dest_y, block_offset,
+                                          sl->mb + (p * 256 << pixel_shift),
+                                          linesize,
+                                          sl->non_zero_count_cache + p * 5 * 
8);
             }
         }
     }
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index d5ea26a6e3..ee8c81a10c 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -174,16 +174,16 @@ static av_noinline void FUNC(hl_decode_mb)(const 
H264Context *h, H264SliceContex
                               h->h264chroma.put_h264_chroma_pixels_tab,
                               h->h264qpel.avg_h264_qpel_pixels_tab,
                               h->h264chroma.avg_h264_chroma_pixels_tab,
-                              h->h264dsp.weight_h264_pixels_tab,
-                              h->h264dsp.biweight_h264_pixels_tab);
+                              h->h264dsp.weight_pixels_tab,
+                              h->h264dsp.biweight_pixels_tab);
             } else {
                 FUNC(hl_motion_420)(h, sl, dest_y, dest_cb, dest_cr,
                               h->h264qpel.put_h264_qpel_pixels_tab,
                               h->h264chroma.put_h264_chroma_pixels_tab,
                               h->h264qpel.avg_h264_qpel_pixels_tab,
                               h->h264chroma.avg_h264_chroma_pixels_tab,
-                              h->h264dsp.weight_h264_pixels_tab,
-                              h->h264dsp.biweight_h264_pixels_tab);
+                              h->h264dsp.weight_pixels_tab,
+                              h->h264dsp.biweight_pixels_tab);
             }
         }
 
@@ -206,7 +206,7 @@ static av_noinline void FUNC(hl_decode_mb)(const 
H264Context *h, H264SliceContex
                                                             sl->mb + (16 * 16 
* 2 << PIXEL_SHIFT),
                                                             uvlinesize);
                 } else {
-                    idct_add = h->h264dsp.h264_add_pixels4_clear;
+                    idct_add = h->h264dsp.add_pixels4_clear;
                     for (j = 1; j < 3; j++) {
                         for (i = j * 16; i < j * 16 + 4; i++)
                             if (sl->non_zero_count_cache[scan8[i]] ||
@@ -234,14 +234,14 @@ static av_noinline void FUNC(hl_decode_mb)(const 
H264Context *h, H264SliceContex
                     qp[1] = sl->chroma_qp[1];
                 }
                 if (sl->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
-                    h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + (16 * 16 * 
1 << PIXEL_SHIFT),
-                                                           
h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
+                    h->h264dsp.chroma_dc_dequant_idct(sl->mb + (16 * 16 * 1 << 
PIXEL_SHIFT),
+                                                      
h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
                 if (sl->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
-                    h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + (16 * 16 * 
2 << PIXEL_SHIFT),
-                                                           
h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
-                h->h264dsp.h264_idct_add8(dest, block_offset,
-                                          sl->mb, uvlinesize,
-                                          sl->non_zero_count_cache);
+                    h->h264dsp.chroma_dc_dequant_idct(sl->mb + (16 * 16 * 2 << 
PIXEL_SHIFT),
+                                                      
h->ps.pps->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
+                h->h264dsp.idct_add8(dest, block_offset,
+                                     sl->mb, uvlinesize,
+                                     sl->non_zero_count_cache);
             }
         }
     }
@@ -341,8 +341,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(const 
H264Context *h, H264SliceCo
                       h->h264chroma.put_h264_chroma_pixels_tab,
                       h->h264qpel.avg_h264_qpel_pixels_tab,
                       h->h264chroma.avg_h264_chroma_pixels_tab,
-                      h->h264dsp.weight_h264_pixels_tab,
-                      h->h264dsp.biweight_h264_pixels_tab);
+                      h->h264dsp.weight_pixels_tab,
+                      h->h264dsp.biweight_pixels_tab);
         }
 
         for (p = 0; p < plane_count; p++)
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index f4c5238372..f612353596 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -70,13 +70,13 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int 
bit_depth,
 #define FUNC(a, depth) a ## _ ## depth ## _c
 
 #define SET_PIXSIZE_FUNCS(depth) \
-    c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+    c->luma_dc_dequant_idct = FUNC(ff_h264_luma_dc_dequant_idct, depth);\
     if (chroma_format_idc <= 1)\
-        c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, 
depth);\
+        c->chroma_dc_dequant_idct = FUNC(ff_h264_chroma_dc_dequant_idct, 
depth);\
     else\
-        c->h264_chroma_dc_dequant_idct= 
FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\
-    c->h264_add_pixels4_clear = FUNC(ff_h264_add_pixels4, depth);\
-    c->h264_add_pixels8_clear = FUNC(ff_h264_add_pixels8, depth)
+        c->chroma_dc_dequant_idct = FUNC(ff_h264_chroma422_dc_dequant_idct, 
depth);\
+    c->add_pixels4_clear = FUNC(ff_h264_add_pixels4, depth);\
+    c->add_pixels8_clear = FUNC(ff_h264_add_pixels8, depth)
 
     if (bit_depth > 8 && bit_depth <= 16) {
         SET_PIXSIZE_FUNCS(16);
@@ -85,52 +85,52 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int 
bit_depth,
     }
 
 #define H264_DSP(depth) \
-    c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
-    c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
-    c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
-    c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
-    c->h264_idct_add16     = FUNC(ff_h264_idct_add16, depth);\
-    c->h264_idct8_add4     = FUNC(ff_h264_idct8_add4, depth);\
+    c->idct_add     = FUNC(ff_h264_idct_add, depth);\
+    c->idct8_add    = FUNC(ff_h264_idct8_add, depth);\
+    c->idct_dc_add  = FUNC(ff_h264_idct_dc_add, depth);\
+    c->idct8_dc_add = FUNC(ff_h264_idct8_dc_add, depth);\
+    c->idct_add16   = FUNC(ff_h264_idct_add16, depth);\
+    c->idct8_add4   = FUNC(ff_h264_idct8_add4, depth);\
     if (chroma_format_idc <= 1)\
-        c->h264_idct_add8  = FUNC(ff_h264_idct_add8, depth);\
+        c->idct_add8  = FUNC(ff_h264_idct_add8, depth);\
     else\
-        c->h264_idct_add8  = FUNC(ff_h264_idct_add8_422, depth);\
-    c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
+        c->idct_add8  = FUNC(ff_h264_idct_add8_422, depth);\
+    c->idct_add16intra = FUNC(ff_h264_idct_add16intra, depth);\
 \
-    c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16, depth);\
-    c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels8, depth);\
-    c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels4, depth);\
-    c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels2, depth);\
-    c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16, depth);\
-    c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels8, depth);\
-    c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels4, depth);\
-    c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels2, depth);\
+    c->weight_pixels_tab[0]   = FUNC(weight_h264_pixels16, depth);\
+    c->weight_pixels_tab[1]   = FUNC(weight_h264_pixels8, depth);\
+    c->weight_pixels_tab[2]   = FUNC(weight_h264_pixels4, depth);\
+    c->weight_pixels_tab[3]   = FUNC(weight_h264_pixels2, depth);\
+    c->biweight_pixels_tab[0] = FUNC(biweight_h264_pixels16, depth);\
+    c->biweight_pixels_tab[1] = FUNC(biweight_h264_pixels8, depth);\
+    c->biweight_pixels_tab[2] = FUNC(biweight_h264_pixels4, depth);\
+    c->biweight_pixels_tab[3] = FUNC(biweight_h264_pixels2, depth);\
 \
-    c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
-    c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
-    c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, 
depth);\
-    c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, 
depth);\
-    c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, 
depth);\
-    c->h264_h_loop_filter_luma_mbaff_intra= 
FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
-    c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
+    c->v_loop_filter_luma       = FUNC(h264_v_loop_filter_luma, depth);\
+    c->h_loop_filter_luma       = FUNC(h264_h_loop_filter_luma, depth);\
+    c->h_loop_filter_luma_mbaff = FUNC(h264_h_loop_filter_luma_mbaff, depth);\
+    c->v_loop_filter_luma_intra = FUNC(h264_v_loop_filter_luma_intra, depth);\
+    c->h_loop_filter_luma_intra = FUNC(h264_h_loop_filter_luma_intra, depth);\
+    c->h_loop_filter_luma_mbaff_intra = 
FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
+    c->v_loop_filter_chroma = FUNC(h264_v_loop_filter_chroma, depth);\
     if (chroma_format_idc <= 1)\
-        c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
+        c->h_loop_filter_chroma = FUNC(h264_h_loop_filter_chroma, depth);\
     else\
-        c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, 
depth);\
+        c->h_loop_filter_chroma = FUNC(h264_h_loop_filter_chroma422, depth);\
     if (chroma_format_idc <= 1)\
-        c->h264_h_loop_filter_chroma_mbaff= 
FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
+        c->h_loop_filter_chroma_mbaff = FUNC(h264_h_loop_filter_chroma_mbaff, 
depth);\
     else\
-        c->h264_h_loop_filter_chroma_mbaff= 
FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
-    c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, 
depth);\
+        c->h_loop_filter_chroma_mbaff = 
FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\
+    c->v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, 
depth);\
     if (chroma_format_idc <= 1)\
-        c->h264_h_loop_filter_chroma_intra= 
FUNC(h264_h_loop_filter_chroma_intra, depth);\
+        c->h_loop_filter_chroma_intra = FUNC(h264_h_loop_filter_chroma_intra, 
depth);\
     else\
-        c->h264_h_loop_filter_chroma_intra= 
FUNC(h264_h_loop_filter_chroma422_intra, depth);\
+        c->h_loop_filter_chroma_intra = 
FUNC(h264_h_loop_filter_chroma422_intra, depth);\
     if (chroma_format_idc <= 1)\
-        c->h264_h_loop_filter_chroma_mbaff_intra= 
FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
+        c->h_loop_filter_chroma_mbaff_intra = 
FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
     else\
-        c->h264_h_loop_filter_chroma_mbaff_intra= 
FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
-    c->h264_loop_filter_strength= NULL;
+        c->h_loop_filter_chroma_mbaff_intra = 
FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\
+    c->loop_filter_strength = NULL;
 
     switch (bit_depth) {
     case 9:
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 4a9cb1568d..f40d324a4e 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -41,71 +41,71 @@ typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t 
*src,
  */
 typedef struct H264DSPContext {
     /* weighted MC */
-    h264_weight_func weight_h264_pixels_tab[4];
-    h264_biweight_func biweight_h264_pixels_tab[4];
+    h264_weight_func     weight_pixels_tab[4];
+    h264_biweight_func biweight_pixels_tab[4];
 
     /* loop filter */
-    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t 
stride,
-                                    int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t 
stride,
-                                    int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t 
stride,
-                                          int alpha, int beta, int8_t *tc0);
+    void (*v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
+                               int alpha, int beta, int8_t *tc0);
+    void (*h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride,
+                               int alpha, int beta, int8_t *tc0);
+    void (*h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t 
stride,
+                                     int alpha, int beta, int8_t *tc0);
     /* v/h_loop_filter_luma_intra: align 16 */
-    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
-                                          int alpha, int beta);
-    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
-                                          int alpha, int beta);
-    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
-                                                ptrdiff_t stride, int alpha, 
int beta);
-    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t 
stride,
-                                      int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t 
stride,
-                                      int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
-                                            ptrdiff_t stride, int alpha, int 
beta,
-                                            int8_t *tc0);
-    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
-                                            ptrdiff_t stride, int alpha, int 
beta);
-    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
-                                            ptrdiff_t stride, int alpha, int 
beta);
-    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
-                                                  ptrdiff_t stride, int alpha, 
int beta);
-    // h264_loop_filter_strength: simd only. the C version is inlined in 
h264_loopfilter.c
-    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
-                                      int8_t ref[2][40], int16_t mv[2][40][2],
-                                      int bidir, int edges, int step,
-                                      int mask_mv0, int mask_mv1, int field);
+    void (*v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
+                                     int alpha, int beta);
+    void (*h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
+                                     int alpha, int beta);
+    void (*h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
+                                           ptrdiff_t stride, int alpha, int 
beta);
+    void (*v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t stride,
+                                 int alpha, int beta, int8_t *tc0);
+    void (*h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t stride,
+                                 int alpha, int beta, int8_t *tc0);
+    void (*h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
+                                       ptrdiff_t stride, int alpha, int beta,
+                                       int8_t *tc0);
+    void (*v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                       ptrdiff_t stride, int alpha, int beta);
+    void (*h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                       ptrdiff_t stride, int alpha, int beta);
+    void (*h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
+                                             ptrdiff_t stride, int alpha, int 
beta);
+    // loop_filter_strength: simd only. the C version is inlined in 
h264_loopfilter.c
+    void (*loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
+                                 int8_t ref[2][40], int16_t mv[2][40][2],
+                                 int bidir, int edges, int step,
+                                 int mask_mv0, int mask_mv1, int field);
 
     /* IDCT */
-    void (*h264_idct_add)(uint8_t *dst /*align 4*/,
-                          int16_t *block /*align 16*/, int stride);
-    void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
-                           int16_t *block /*align 16*/, int stride);
-    void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
-                             int16_t *block /*align 16*/, int stride);
-    void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
-                              int16_t *block /*align 16*/, int stride);
+    void (*idct_add)(uint8_t *dst /*align 4*/,
+                     int16_t *block /*align 16*/, int stride);
+    void (*idct8_add)(uint8_t *dst /*align 8*/,
+                      int16_t *block /*align 16*/, int stride);
+    void (*idct_dc_add)(uint8_t *dst /*align 4*/,
+                        int16_t *block /*align 16*/, int stride);
+    void (*idct8_dc_add)(uint8_t *dst /*align 8*/,
+                         int16_t *block /*align 16*/, int stride);
 
-    void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
-                            int16_t *block /*align 16*/, int stride,
-                            const uint8_t nnzc[5 * 8]);
-    void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
-                            int16_t *block /*align 16*/, int stride,
-                            const uint8_t nnzc[5 * 8]);
-    void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
-                           int16_t *block /*align 16*/, int stride,
-                           const uint8_t nnzc[15 * 8]);
-    void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int 
*blockoffset,
-                                 int16_t *block /*align 16*/,
-                                 int stride, const uint8_t nnzc[5 * 8]);
-    void (*h264_luma_dc_dequant_idct)(int16_t *output,
-                                      int16_t *input /*align 16*/, int qmul);
-    void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
+    void (*idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                       int16_t *block /*align 16*/, int stride,
+                       const uint8_t nnzc[5 * 8]);
+    void (*idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                       int16_t *block /*align 16*/, int stride,
+                       const uint8_t nnzc[5 * 8]);
+    void (*idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
+                      int16_t *block /*align 16*/, int stride,
+                      const uint8_t nnzc[15 * 8]);
+    void (*idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            int16_t *block /*align 16*/,
+                            int stride, const uint8_t nnzc[5 * 8]);
+    void (*luma_dc_dequant_idct)(int16_t *output,
+                                 int16_t *input /*align 16*/, int qmul);
+    void (*chroma_dc_dequant_idct)(int16_t *block, int qmul);
 
     /* bypass-transform */
-    void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
-    void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+    void (*add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
+    void (*add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
 
     /**
      * Search buf from the start for up to size bytes. Return the index
diff --git a/libavcodec/loongarch/h264dsp_init_loongarch.c 
b/libavcodec/loongarch/h264dsp_init_loongarch.c
index b70fe696d2..745915d5c6 100644
--- a/libavcodec/loongarch/h264dsp_init_loongarch.c
+++ b/libavcodec/loongarch/h264dsp_init_loongarch.c
@@ -30,67 +30,67 @@ av_cold void ff_h264dsp_init_loongarch(H264DSPContext *c, 
const int bit_depth,
 
     if (have_lsx(cpu_flags)) {
         if (chroma_format_idc <= 1)
-            c->h264_loop_filter_strength = ff_h264_loop_filter_strength_lsx;
+            c->loop_filter_strength = ff_h264_loop_filter_strength_lsx;
         if (bit_depth == 8) {
-            c->h264_idct_add     = ff_h264_idct_add_8_lsx;
-            c->h264_idct8_add    = ff_h264_idct8_add_8_lsx;
-            c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_lsx;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
+            c->idct_add     = ff_h264_idct_add_8_lsx;
+            c->idct8_add    = ff_h264_idct8_add_8_lsx;
+            c->idct_dc_add  = ff_h264_idct_dc_add_8_lsx;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
 
             if (chroma_format_idc <= 1) {
-                c->h264_idct_add8 = ff_h264_idct_add8_8_lsx;
-                c->h264_h_loop_filter_chroma = ff_h264_h_lpf_chroma_8_lsx;
-                c->h264_h_loop_filter_chroma_intra = 
ff_h264_h_lpf_chroma_intra_8_lsx;
+                c->idct_add8 = ff_h264_idct_add8_8_lsx;
+                c->h_loop_filter_chroma = ff_h264_h_lpf_chroma_8_lsx;
+                c->h_loop_filter_chroma_intra = 
ff_h264_h_lpf_chroma_intra_8_lsx;
             } else
-                c->h264_idct_add8 = ff_h264_idct_add8_422_8_lsx;
+                c->idct_add8 = ff_h264_idct_add8_422_8_lsx;
 
-            c->h264_idct_add16 = ff_h264_idct_add16_8_lsx;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_8_lsx;
-            c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_lsx;
-            c->h264_idct_add16intra = ff_h264_idct_add16_intra_8_lsx;
+            c->idct_add16 = ff_h264_idct_add16_8_lsx;
+            c->idct8_add4 = ff_h264_idct8_add4_8_lsx;
+            c->luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_lsx;
+            c->idct_add16intra = ff_h264_idct_add16_intra_8_lsx;
 
-            c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_lsx;
-            c->h264_add_pixels8_clear = ff_h264_add_pixels8_8_lsx;
-            c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_8_lsx;
-            c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_8_lsx;
-            c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lsx;
-            c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lsx;
-            c->h264_v_loop_filter_chroma = ff_h264_v_lpf_chroma_8_lsx;
+            c->add_pixels4_clear = ff_h264_add_pixels4_8_lsx;
+            c->add_pixels8_clear = ff_h264_add_pixels8_8_lsx;
+            c->v_loop_filter_luma = ff_h264_v_lpf_luma_8_lsx;
+            c->h_loop_filter_luma = ff_h264_h_lpf_luma_8_lsx;
+            c->v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lsx;
+            c->h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lsx;
+            c->v_loop_filter_chroma = ff_h264_v_lpf_chroma_8_lsx;
 
-            c->h264_v_loop_filter_chroma_intra = 
ff_h264_v_lpf_chroma_intra_8_lsx;
+            c->v_loop_filter_chroma_intra = ff_h264_v_lpf_chroma_intra_8_lsx;
 
-            c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_lsx;
-            c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_lsx;
-            c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_lsx;
-            c->weight_h264_pixels_tab[0]   = ff_weight_h264_pixels16_8_lsx;
-            c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_lsx;
-            c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels4_8_lsx;
-            c->h264_idct8_add    = ff_h264_idct8_add_8_lsx;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
+            c->biweight_pixels_tab[0] = ff_biweight_h264_pixels16_8_lsx;
+            c->biweight_pixels_tab[1] = ff_biweight_h264_pixels8_8_lsx;
+            c->biweight_pixels_tab[2] = ff_biweight_h264_pixels4_8_lsx;
+            c->weight_pixels_tab[0]   = ff_weight_h264_pixels16_8_lsx;
+            c->weight_pixels_tab[1] = ff_weight_h264_pixels8_8_lsx;
+            c->weight_pixels_tab[2] = ff_weight_h264_pixels4_8_lsx;
+            c->idct8_add    = ff_h264_idct8_add_8_lsx;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_8_lsx;
         }
     }
 #if HAVE_LASX
     if (have_lasx(cpu_flags)) {
         if (chroma_format_idc <= 1)
-            c->h264_loop_filter_strength = ff_h264_loop_filter_strength_lasx;
+            c->loop_filter_strength = ff_h264_loop_filter_strength_lasx;
         if (bit_depth == 8) {
-            c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_lasx;
-            c->h264_add_pixels8_clear = ff_h264_add_pixels8_8_lasx;
-            c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_8_lasx;
-            c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_8_lasx;
-            c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lasx;
-            c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lasx;
+            c->add_pixels4_clear = ff_h264_add_pixels4_8_lasx;
+            c->add_pixels8_clear = ff_h264_add_pixels8_8_lasx;
+            c->v_loop_filter_luma = ff_h264_v_lpf_luma_8_lasx;
+            c->h_loop_filter_luma = ff_h264_h_lpf_luma_8_lasx;
+            c->v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_8_lasx;
+            c->h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_8_lasx;
 
             /* Weighted MC */
-            c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_lasx;
-            c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_lasx;
+            c->weight_pixels_tab[0] = ff_weight_h264_pixels16_8_lasx;
+            c->weight_pixels_tab[1] = ff_weight_h264_pixels8_8_lasx;
 
-            c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_lasx;
-            c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_lasx;
+            c->biweight_pixels_tab[0] = ff_biweight_h264_pixels16_8_lasx;
+            c->biweight_pixels_tab[1] = ff_biweight_h264_pixels8_8_lasx;
 
-            c->h264_idct8_add    = ff_h264_idct8_add_8_lasx;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_lasx;
-            c->h264_idct8_add4   = ff_h264_idct8_add4_8_lasx;
+            c->idct8_add    = ff_h264_idct8_add_8_lasx;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_8_lasx;
+            c->idct8_add4   = ff_h264_idct8_add4_8_lasx;
         }
     }
 #endif // #if HAVE_LASX
diff --git a/libavcodec/mips/h264dsp_init_mips.c 
b/libavcodec/mips/h264dsp_init_mips.c
index 72f42895e8..4d8c3a7a59 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -30,101 +30,101 @@ av_cold void ff_h264dsp_init_mips(H264DSPContext *c, 
const int bit_depth,
 
     if (have_mmi(cpu_flags)) {
         if (bit_depth == 8) {
-            c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
-            c->h264_idct_add = ff_h264_idct_add_8_mmi;
-            c->h264_idct8_add = ff_h264_idct8_add_8_mmi;
-            c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmi;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
-            c->h264_idct_add16 = ff_h264_idct_add16_8_mmi;
-            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmi;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmi;
+            c->add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
+            c->idct_add = ff_h264_idct_add_8_mmi;
+            c->idct8_add = ff_h264_idct8_add_8_mmi;
+            c->idct_dc_add = ff_h264_idct_dc_add_8_mmi;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
+            c->idct_add16 = ff_h264_idct_add16_8_mmi;
+            c->idct_add16intra = ff_h264_idct_add16intra_8_mmi;
+            c->idct8_add4 = ff_h264_idct8_add4_8_mmi;
 
             if (chroma_format_idc <= 1)
-                c->h264_idct_add8 = ff_h264_idct_add8_8_mmi;
+                c->idct_add8 = ff_h264_idct_add8_8_mmi;
             else
-                c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmi;
+                c->idct_add8 = ff_h264_idct_add8_422_8_mmi;
 
-            c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
+            c->luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
 
-            c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
-            c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
-            c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
+            c->weight_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
+            c->weight_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
+            c->weight_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
 
-            c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
-            c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
-            c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
+            c->biweight_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
+            c->biweight_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
+            c->biweight_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
 
-            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmi;
-            c->h264_v_loop_filter_chroma_intra = 
ff_deblock_v_chroma_intra_8_mmi;
+            c->v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmi;
+            c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmi;
 
             if (chroma_format_idc <= 1) {
-                c->h264_h_loop_filter_chroma =
+                c->h_loop_filter_chroma =
                     ff_deblock_h_chroma_8_mmi;
-                c->h264_h_loop_filter_chroma_intra =
+                c->h_loop_filter_chroma_intra =
                     ff_deblock_h_chroma_intra_8_mmi;
             }
 
-            c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
-            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
-            c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
-            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
+            c->v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
+            c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
+            c->h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
+            c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
         }
     }
 
     if (have_msa(cpu_flags)) {
         if (chroma_format_idc <= 1)
-            c->h264_loop_filter_strength = ff_h264_loop_filter_strength_msa;
+            c->loop_filter_strength = ff_h264_loop_filter_strength_msa;
         if (bit_depth == 8) {
-            c->h264_v_loop_filter_luma = ff_h264_v_lpf_luma_inter_msa;
-            c->h264_h_loop_filter_luma = ff_h264_h_lpf_luma_inter_msa;
-            c->h264_h_loop_filter_luma_mbaff =
+            c->v_loop_filter_luma = ff_h264_v_lpf_luma_inter_msa;
+            c->h_loop_filter_luma = ff_h264_h_lpf_luma_inter_msa;
+            c->h_loop_filter_luma_mbaff =
                 ff_h264_h_loop_filter_luma_mbaff_msa;
-            c->h264_v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_msa;
-            c->h264_h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_msa;
-            c->h264_h_loop_filter_luma_mbaff_intra =
+            c->v_loop_filter_luma_intra = ff_h264_v_lpf_luma_intra_msa;
+            c->h_loop_filter_luma_intra = ff_h264_h_lpf_luma_intra_msa;
+            c->h_loop_filter_luma_mbaff_intra =
                 ff_h264_h_loop_filter_luma_mbaff_intra_msa;
-            c->h264_v_loop_filter_chroma = ff_h264_v_lpf_chroma_inter_msa;
+            c->v_loop_filter_chroma = ff_h264_v_lpf_chroma_inter_msa;
 
             if (chroma_format_idc <= 1)
-                c->h264_h_loop_filter_chroma = ff_h264_h_lpf_chroma_inter_msa;
+                c->h_loop_filter_chroma = ff_h264_h_lpf_chroma_inter_msa;
             else
-                c->h264_h_loop_filter_chroma =
+                c->h_loop_filter_chroma =
                     ff_h264_h_loop_filter_chroma422_msa;
 
             if (chroma_format_idc > 1)
-                c->h264_h_loop_filter_chroma_mbaff =
+                c->h_loop_filter_chroma_mbaff =
                     ff_h264_h_loop_filter_chroma422_mbaff_msa;
 
-            c->h264_v_loop_filter_chroma_intra =
+            c->v_loop_filter_chroma_intra =
                 ff_h264_v_lpf_chroma_intra_msa;
 
             if (chroma_format_idc <= 1)
-                c->h264_h_loop_filter_chroma_intra =
+                c->h_loop_filter_chroma_intra =
                     ff_h264_h_lpf_chroma_intra_msa;
 
             /* Weighted MC */
-            c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16_8_msa;
-            c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels8_8_msa;
-            c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels4_8_msa;
+            c->weight_pixels_tab[0] = ff_weight_h264_pixels16_8_msa;
+            c->weight_pixels_tab[1] = ff_weight_h264_pixels8_8_msa;
+            c->weight_pixels_tab[2] = ff_weight_h264_pixels4_8_msa;
 
-            c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa;
-            c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa;
-            c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa;
+            c->biweight_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa;
+            c->biweight_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa;
+            c->biweight_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa;
 
-            c->h264_idct_add = ff_h264_idct_add_msa;
-            c->h264_idct8_add = ff_h264_idct8_addblk_msa;
-            c->h264_idct_dc_add = ff_h264_idct4x4_addblk_dc_msa;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_addblk_msa;
-            c->h264_idct_add16 = ff_h264_idct_add16_msa;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_msa;
+            c->idct_add = ff_h264_idct_add_msa;
+            c->idct8_add = ff_h264_idct8_addblk_msa;
+            c->idct_dc_add = ff_h264_idct4x4_addblk_dc_msa;
+            c->idct8_dc_add = ff_h264_idct8_dc_addblk_msa;
+            c->idct_add16 = ff_h264_idct_add16_msa;
+            c->idct8_add4 = ff_h264_idct8_add4_msa;
 
             if (chroma_format_idc <= 1)
-                c->h264_idct_add8 = ff_h264_idct_add8_msa;
+                c->idct_add8 = ff_h264_idct_add8_msa;
             else
-                c->h264_idct_add8 = ff_h264_idct_add8_422_msa;
+                c->idct_add8 = ff_h264_idct_add8_422_msa;
 
-            c->h264_idct_add16intra = ff_h264_idct_add16_intra_msa;
-            c->h264_luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa;
+            c->idct_add16intra = ff_h264_idct_add16_intra_msa;
+            c->luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa;
         }
     }
 }
diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index 0650768d7b..22d3db64a1 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -793,22 +793,22 @@ av_cold void ff_h264dsp_init_ppc(H264DSPContext *c, const 
int bit_depth,
         return;
 
     if (bit_depth == 8) {
-        c->h264_idct_add = h264_idct_add_altivec;
+        c->idct_add = h264_idct_add_altivec;
         if (chroma_format_idc <= 1)
-            c->h264_idct_add8 = h264_idct_add8_altivec;
-        c->h264_idct_add16      = h264_idct_add16_altivec;
-        c->h264_idct_add16intra = h264_idct_add16intra_altivec;
-        c->h264_idct_dc_add= h264_idct_dc_add_altivec;
-        c->h264_idct8_dc_add = h264_idct8_dc_add_altivec;
-        c->h264_idct8_add    = h264_idct8_add_altivec;
-        c->h264_idct8_add4   = h264_idct8_add4_altivec;
-        c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
-        c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
+            c->idct_add8 = h264_idct_add8_altivec;
+        c->idct_add16      = h264_idct_add16_altivec;
+        c->idct_add16intra = h264_idct_add16intra_altivec;
+        c->idct_dc_add = h264_idct_dc_add_altivec;
+        c->idct8_dc_add = h264_idct8_dc_add_altivec;
+        c->idct8_add    = h264_idct8_add_altivec;
+        c->idct8_add4   = h264_idct8_add4_altivec;
+        c->v_loop_filter_luma = h264_v_loop_filter_luma_altivec;
+        c->h_loop_filter_luma = h264_h_loop_filter_luma_altivec;
 
-        c->weight_h264_pixels_tab[0]   = weight_h264_pixels16_altivec;
-        c->weight_h264_pixels_tab[1]   = weight_h264_pixels8_altivec;
-        c->biweight_h264_pixels_tab[0] = biweight_h264_pixels16_altivec;
-        c->biweight_h264_pixels_tab[1] = biweight_h264_pixels8_altivec;
+        c->weight_pixels_tab[0]   = weight_h264_pixels16_altivec;
+        c->weight_pixels_tab[1]   = weight_h264_pixels8_altivec;
+        c->biweight_pixels_tab[0] = biweight_h264_pixels16_altivec;
+        c->biweight_pixels_tab[1] = biweight_h264_pixels8_altivec;
     }
 #endif /* HAVE_ALTIVEC */
 }
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 06cb3c59de..5efec9eb5e 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -113,106 +113,106 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, 
const int bit_depth,
         if (bit_depth == 8) {
             if (zvl128b) {
                 if (flags & AV_CPU_FLAG_RVB)
-                    dsp->weight_h264_pixels_tab[0] =
+                    dsp->weight_pixels_tab[0] =
                         ff_h264_weight_funcs_8_rvv[0].weight;
-                dsp->biweight_h264_pixels_tab[0] =
+                dsp->biweight_pixels_tab[0] =
                     ff_h264_weight_funcs_8_rvv[0].biweight;
             }
             if (flags & AV_CPU_FLAG_RVV_I64) {
-                dsp->weight_h264_pixels_tab[1] =
+                dsp->weight_pixels_tab[1] =
                     ff_h264_weight_funcs_8_rvv[1].weight;
-                dsp->biweight_h264_pixels_tab[1] =
+                dsp->biweight_pixels_tab[1] =
                     ff_h264_weight_funcs_8_rvv[1].biweight;
             }
-            dsp->weight_h264_pixels_tab[2] =
+            dsp->weight_pixels_tab[2] =
                  ff_h264_weight_funcs_8_rvv[2].weight;
-            dsp->biweight_h264_pixels_tab[2] =
+            dsp->biweight_pixels_tab[2] =
                  ff_h264_weight_funcs_8_rvv[2].biweight;
-            dsp->weight_h264_pixels_tab[3] =
+            dsp->weight_pixels_tab[3] =
                  ff_h264_weight_funcs_8_rvv[3].weight;
-            dsp->biweight_h264_pixels_tab[3] =
+            dsp->biweight_pixels_tab[3] =
                  ff_h264_weight_funcs_8_rvv[3].biweight;
         }
 
         if (bit_depth == 8 && zvl128b) {
-            dsp->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
-            dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
-            dsp->h264_h_loop_filter_luma_mbaff =
+            dsp->v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
+            dsp->h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
+            dsp->h_loop_filter_luma_mbaff =
                 ff_h264_h_loop_filter_luma_mbaff_8_rvv;
-            dsp->h264_v_loop_filter_luma_intra =
+            dsp->v_loop_filter_luma_intra =
                 ff_h264_v_loop_filter_luma_intra_8_rvv;
-            dsp->h264_h_loop_filter_luma_intra =
+            dsp->h_loop_filter_luma_intra =
                 ff_h264_h_loop_filter_luma_intra_8_rvv;
-            dsp->h264_h_loop_filter_luma_mbaff_intra =
+            dsp->h_loop_filter_luma_mbaff_intra =
                 ff_h264_h_loop_filter_luma_mbaff_intra_8_rvv;
-            dsp->h264_v_loop_filter_chroma =
+            dsp->v_loop_filter_chroma =
                 ff_h264_v_loop_filter_chroma_8_rvv;
-            dsp->h264_v_loop_filter_chroma_intra =
+            dsp->v_loop_filter_chroma_intra =
                 ff_h264_v_loop_filter_chroma_intra_8_rvv;
 
             if (chroma_format_idc <= 1) {
-                dsp->h264_h_loop_filter_chroma =
+                dsp->h_loop_filter_chroma =
                     ff_h264_h_loop_filter_chroma_8_rvv;
-                dsp->h264_h_loop_filter_chroma_mbaff =
+                dsp->h_loop_filter_chroma_mbaff =
                     ff_h264_h_loop_filter_chroma_mbaff_8_rvv;
-                dsp->h264_h_loop_filter_chroma_intra =
+                dsp->h_loop_filter_chroma_intra =
                     ff_h264_h_loop_filter_chroma_intra_8_rvv;
-                dsp->h264_h_loop_filter_chroma_mbaff_intra =
+                dsp->h_loop_filter_chroma_mbaff_intra =
                     ff_h264_h_loop_filter_chroma_mbaff_intra_8_rvv;
             }
 
-            dsp->h264_idct_add  = ff_h264_idct_add_8_rvv;
-            dsp->h264_idct8_add = ff_h264_idct8_add_8_rvv;
+            dsp->idct_add  = ff_h264_idct_add_8_rvv;
+            dsp->idct8_add = ff_h264_idct8_add_8_rvv;
             if (flags & AV_CPU_FLAG_RVB) {
-                dsp->h264_idct_dc_add     = ff_h264_idct4_dc_add_8_rvv;
-                dsp->h264_idct_add16      = ff_h264_idct_add16_8_rvv;
-                dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
+                dsp->idct_dc_add     = ff_h264_idct4_dc_add_8_rvv;
+                dsp->idct_add16      = ff_h264_idct_add16_8_rvv;
+                dsp->idct_add16intra = ff_h264_idct_add16intra_8_rvv;
 #  if __riscv_xlen == 64
-                dsp->h264_idct8_add4      = ff_h264_idct8_add4_8_rvv;
+                dsp->idct8_add4      = ff_h264_idct8_add4_8_rvv;
                 if (chroma_format_idc <= 1)
-                    dsp->h264_idct_add8   = ff_h264_idct4_add8_8_rvv;
+                    dsp->idct_add8   = ff_h264_idct4_add8_8_rvv;
                 else
-                    dsp->h264_idct_add8   = ff_h264_idct4_add8_422_8_rvv;
+                    dsp->idct_add8   = ff_h264_idct4_add8_422_8_rvv;
 #  endif
             }
 
-            dsp->h264_luma_dc_dequant_idct =
+            dsp->luma_dc_dequant_idct =
                 ff_h264_luma_dc_dequant_idct_8_rvv;
 
             if (flags & AV_CPU_FLAG_RVV_I64) {
-                dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+                dsp->add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
                 if (flags & AV_CPU_FLAG_RVB)
-                    dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_rvv;
+                    dsp->idct8_dc_add = ff_h264_idct8_dc_add_8_rvv;
             }
-            dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
+            dsp->add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
         }
 
 #define IDCT_DEPTH(depth) \
         if (bit_depth == depth) { \
             if (zvl128b) { \
-                dsp->h264_idct_add = ff_h264_idct_add_##depth##_rvv; \
-                dsp->h264_luma_dc_dequant_idct = \
+                dsp->idct_add = ff_h264_idct_add_##depth##_rvv; \
+                dsp->luma_dc_dequant_idct = \
                     ff_h264_luma_dc_dequant_idct_9_rvv; \
             } \
             if (flags & AV_CPU_FLAG_RVB) \
-                dsp->h264_idct8_add = ff_h264_idct8_add_##depth##_rvv; \
+                dsp->idct8_add = ff_h264_idct8_add_##depth##_rvv; \
             if (zvl128b && (flags & AV_CPU_FLAG_RVB)) { \
-                dsp->h264_idct_dc_add  = ff_h264_idct4_dc_add_##depth##_rvv; \
-                dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_##depth##_rvv; \
-                dsp->h264_idct_add16 = ff_h264_idct_add16_##depth##_rvv; \
-                dsp->h264_idct_add16intra = \
+                dsp->idct_dc_add  = ff_h264_idct4_dc_add_##depth##_rvv; \
+                dsp->idct8_dc_add = ff_h264_idct8_dc_add_##depth##_rvv; \
+                dsp->idct_add16 = ff_h264_idct_add16_##depth##_rvv; \
+                dsp->idct_add16intra = \
                     ff_h264_idct_add16intra_##depth##_rvv; \
                 if (__riscv_xlen == 64) { \
                     if (chroma_format_idc <= 1) \
-                        dsp->h264_idct_add8 = \
+                        dsp->idct_add8 = \
                             ff_h264_idct4_add8_##depth##_rvv; \
                     else \
-                        dsp->h264_idct_add8 = \
+                        dsp->idct_add8 = \
                             ff_h264_idct4_add8_422_##depth##_rvv; \
                 } \
             } \
             if (__riscv_xlen == 64 && (flags & AV_CPU_FLAG_RVB)) \
-                dsp->h264_idct8_add4 = ff_h264_idct8_add4_##depth##_rvv; \
+                dsp->idct8_add4 = ff_h264_idct8_add4_##depth##_rvv; \
         }
 
         IDCT_DEPTH(9)
@@ -221,9 +221,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, 
const int bit_depth,
         IDCT_DEPTH(14)
 
         if (bit_depth > 8 && zvl128b) {
-            dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_16_rvv;
+            dsp->add_pixels8_clear = ff_h264_add_pixels8_16_rvv;
             if (flags & AV_CPU_FLAG_RVV_I64)
-                dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+                dsp->add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
         }
 
         dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index dfcfce77d3..296e81f322 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -696,10 +696,10 @@ static void hl_decode_mb(SVQ3Context *s)
 
     if (s->cbp & 0x30) {
         uint8_t *dest[2] = { dest_cb, dest_cr };
-        s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
-                                               s->dequant4_coeff[4][0]);
-        s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
-                                               s->dequant4_coeff[4][0]);
+        s->h264dsp.chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
+                                          s->dequant4_coeff[4][0]);
+        s->h264dsp.chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
+                                          s->dequant4_coeff[4][0]);
         for (j = 1; j < 3; j++) {
             for (i = j * 16; i < j * 16 + 4; i++)
                 if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 1ee1ee4367..add1344022 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -188,163 +188,163 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, 
const int bit_depth,
     int cpu_flags = av_get_cpu_flags();
 
     if (EXTERNAL_MMXEXT(cpu_flags) && chroma_format_idc <= 1)
-        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
+        c->loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
 
     if (bit_depth == 8) {
         if (EXTERNAL_MMX(cpu_flags)) {
             if (chroma_format_idc <= 1) {
             } else {
-                c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx;
+                c->idct_add8 = ff_h264_idct_add8_422_8_mmx;
             }
         }
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
 
-            c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext;
+            c->weight_pixels_tab[2] = ff_h264_weight_4_mmxext;
 
-            c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;
+            c->biweight_pixels_tab[2] = ff_h264_biweight_4_mmxext;
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
-            c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;
+            c->idct8_add  = ff_h264_idct8_add_8_sse2;
 
-            c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
+            c->idct_add16 = ff_h264_idct_add16_8_sse2;
+            c->idct8_add4 = ff_h264_idct8_add4_8_sse2;
             if (chroma_format_idc <= 1)
-                c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
-            c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
-            c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
+                c->idct_add8 = ff_h264_idct_add8_8_sse2;
+            c->idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
+            c->luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
 
-            c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
-            c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
+            c->weight_pixels_tab[0] = ff_h264_weight_16_sse2;
+            c->weight_pixels_tab[1] = ff_h264_weight_8_sse2;
 
-            c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
-            c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
+            c->biweight_pixels_tab[0] = ff_h264_biweight_16_sse2;
+            c->biweight_pixels_tab[1] = ff_h264_biweight_8_sse2;
 
-            c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
-            c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
-            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
-            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
+            c->v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
+            c->h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
+            c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
+            c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
 
 #if ARCH_X86_64
-            c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
+            c->h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
 #endif
 
-            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_sse2;
-            c->h264_v_loop_filter_chroma_intra = 
ff_deblock_v_chroma_intra_8_sse2;
+            c->v_loop_filter_chroma       = ff_deblock_v_chroma_8_sse2;
+            c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
             if (chroma_format_idc <= 1) {
-                c->h264_h_loop_filter_chroma       = 
ff_deblock_h_chroma_8_sse2;
-                c->h264_h_loop_filter_chroma_intra = 
ff_deblock_h_chroma_intra_8_sse2;
+                c->h_loop_filter_chroma       = ff_deblock_h_chroma_8_sse2;
+                c->h_loop_filter_chroma_intra = 
ff_deblock_h_chroma_intra_8_sse2;
             } else {
-                c->h264_h_loop_filter_chroma       = 
ff_deblock_h_chroma422_8_sse2;
-                c->h264_h_loop_filter_chroma_intra = 
ff_deblock_h_chroma422_intra_8_sse2;
+                c->h_loop_filter_chroma       = ff_deblock_h_chroma422_8_sse2;
+                c->h_loop_filter_chroma_intra = 
ff_deblock_h_chroma422_intra_8_sse2;
             }
 
-            c->h264_idct_add        = ff_h264_idct_add_8_sse2;
-            c->h264_idct_dc_add     = ff_h264_idct_dc_add_8_sse2;
+            c->idct_add        = ff_h264_idct_add_8_sse2;
+            c->idct_dc_add     = ff_h264_idct_dc_add_8_sse2;
         }
         if (EXTERNAL_SSSE3(cpu_flags)) {
-            c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
-            c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
+            c->biweight_pixels_tab[0] = ff_h264_biweight_16_ssse3;
+            c->biweight_pixels_tab[1] = ff_h264_biweight_8_ssse3;
         }
         if (EXTERNAL_AVX(cpu_flags)) {
-            c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
-            c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
-            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
-            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
+            c->v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
+            c->h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
+            c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
+            c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
 #if ARCH_X86_64
-            c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
+            c->h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
 #endif
 
-            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_avx;
-            c->h264_v_loop_filter_chroma_intra = 
ff_deblock_v_chroma_intra_8_avx;
+            c->v_loop_filter_chroma       = ff_deblock_v_chroma_8_avx;
+            c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_avx;
             if (chroma_format_idc <= 1) {
-                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_avx;
-                c->h264_h_loop_filter_chroma_intra = 
ff_deblock_h_chroma_intra_8_avx;
+                c->h_loop_filter_chroma       = ff_deblock_h_chroma_8_avx;
+                c->h_loop_filter_chroma_intra = 
ff_deblock_h_chroma_intra_8_avx;
             } else {
-                c->h264_h_loop_filter_chroma       = 
ff_deblock_h_chroma422_8_avx;
-                c->h264_h_loop_filter_chroma_intra = 
ff_deblock_h_chroma422_intra_8_avx;
+                c->h_loop_filter_chroma       = ff_deblock_h_chroma422_8_avx;
+                c->h_loop_filter_chroma_intra = 
ff_deblock_h_chroma422_intra_8_avx;
             }
 
-            c->h264_idct_add        = ff_h264_idct_add_8_avx;
-            c->h264_idct_dc_add     = ff_h264_idct_dc_add_8_avx;
+            c->idct_add        = ff_h264_idct_add_8_avx;
+            c->idct_dc_add     = ff_h264_idct_dc_add_8_avx;
         }
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-            c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
+            c->idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
-            c->h264_idct_add     = ff_h264_idct_add_10_sse2;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
+            c->idct_add     = ff_h264_idct_add_10_sse2;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
 
-            c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
+            c->idct_add16 = ff_h264_idct_add16_10_sse2;
             if (chroma_format_idc <= 1) {
-                c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
+                c->idct_add8 = ff_h264_idct_add8_10_sse2;
             } else {
-                c->h264_idct_add8 = ff_h264_idct_add8_422_10_sse2;
+                c->idct_add8 = ff_h264_idct_add8_422_10_sse2;
             }
-            c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
+            c->idct_add16intra = ff_h264_idct_add16intra_10_sse2;
 #if HAVE_ALIGNED_STACK
-            c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
+            c->idct8_add  = ff_h264_idct8_add_10_sse2;
+            c->idct8_add4 = ff_h264_idct8_add4_10_sse2;
 #endif /* HAVE_ALIGNED_STACK */
 
-            c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
-            c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
-            c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
+            c->weight_pixels_tab[0] = ff_h264_weight_16_10_sse2;
+            c->weight_pixels_tab[1] = ff_h264_weight_8_10_sse2;
+            c->weight_pixels_tab[2] = ff_h264_weight_4_10_sse2;
 
-            c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
-            c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
-            c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
+            c->biweight_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
+            c->biweight_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
+            c->biweight_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
 
-            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
-            c->h264_v_loop_filter_chroma_intra = 
ff_deblock_v_chroma_intra_10_sse2;
+            c->v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
+            c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
             if (chroma_format_idc <= 1) {
-                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
+                c->h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
             } else {
-                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
+                c->h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
             }
-            c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
-            c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
-            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
-            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
+            c->v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
+            c->h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
+            c->v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
+            c->h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
         }
         if (EXTERNAL_SSE4(cpu_flags)) {
-            c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
-            c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
-            c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
+            c->weight_pixels_tab[0] = ff_h264_weight_16_10_sse4;
+            c->weight_pixels_tab[1] = ff_h264_weight_8_10_sse4;
+            c->weight_pixels_tab[2] = ff_h264_weight_4_10_sse4;
 
-            c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
-            c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
-            c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
+            c->biweight_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
+            c->biweight_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
+            c->biweight_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
         }
         if (EXTERNAL_AVX(cpu_flags)) {
-            c->h264_idct_dc_add  =
-            c->h264_idct_add     = ff_h264_idct_add_10_avx;
-            c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
+            c->idct_dc_add  =
+            c->idct_add     = ff_h264_idct_add_10_avx;
+            c->idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
 
-            c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
+            c->idct_add16 = ff_h264_idct_add16_10_avx;
             if (chroma_format_idc <= 1) {
-                c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
+                c->idct_add8 = ff_h264_idct_add8_10_avx;
             } else {
-                c->h264_idct_add8 = ff_h264_idct_add8_422_10_avx;
+                c->idct_add8 = ff_h264_idct_add8_422_10_avx;
             }
-            c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
+            c->idct_add16intra = ff_h264_idct_add16intra_10_avx;
 #if HAVE_ALIGNED_STACK
-            c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
-            c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
+            c->idct8_add  = ff_h264_idct8_add_10_avx;
+            c->idct8_add4 = ff_h264_idct8_add4_10_avx;
 #endif /* HAVE_ALIGNED_STACK */
 
-            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
-            c->h264_v_loop_filter_chroma_intra = 
ff_deblock_v_chroma_intra_10_avx;
+            c->v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
+            c->v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
             if (chroma_format_idc <= 1) {
-                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
+                c->h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
             } else {
-                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
+                c->h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
             }
-            c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
-            c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
-            c->h264_v_loop_filter_luma_intra   = 
ff_deblock_v_luma_intra_10_avx;
-            c->h264_h_loop_filter_luma_intra   = 
ff_deblock_h_luma_intra_10_avx;
+            c->v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
+            c->h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
+            c->v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
+            c->h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
         }
     }
 }
diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index 0bf01e072e..5a5a553901 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -194,8 +194,8 @@ static void check_idct(void)
             for (sz = 4; sz <= 8; sz += 4) {
                 void (*idct)(uint8_t *, int16_t *, int) = NULL;
                 const char fmts[3][28] = {
-                    "h264_idct%d_add_%dbpp", "h264_idct%d_dc_add_%dbpp",
-                    "h264_add_pixels%d_%dbpp",
+                    "idct%d_add_%dbpp", "idct%d_dc_add_%dbpp",
+                    "add_pixels%d_%dbpp",
                 };
 
                 randomize_buffers(i);
@@ -206,12 +206,12 @@ static void check_idct(void)
                     dct8x8(coef, bit_depth);
 
                 switch ((sz << 2) | dc) {
-                case (4 << 2) | 0: idct = h.h264_idct_add; break;
-                case (4 << 2) | 1: idct = h.h264_idct_dc_add; break;
-                case (4 << 2) | 2: idct = h.h264_add_pixels4_clear; break;
-                case (8 << 2) | 0: idct = h.h264_idct8_add; break;
-                case (8 << 2) | 1: idct = h.h264_idct8_dc_add; break;
-                case (8 << 2) | 2: idct = h.h264_add_pixels8_clear; break;
+                case (4 << 2) | 0: idct = h.idct_add; break;
+                case (4 << 2) | 1: idct = h.idct_dc_add; break;
+                case (4 << 2) | 2: idct = h.add_pixels4_clear; break;
+                case (8 << 2) | 0: idct = h.idct8_add; break;
+                case (8 << 2) | 1: idct = h.idct8_dc_add; break;
+                case (8 << 2) | 2: idct = h.add_pixels8_clear; break;
                 }
 
                 if (check_func(idct, fmts[dc], sz, bit_depth)) {
@@ -261,17 +261,17 @@ static void check_idct_multiple(void)
             int block_offset[16] = { 0 };
             switch (func) {
             case 0:
-                idct = h.h264_idct_add16;
-                name = "h264_idct_add16";
+                idct = h.idct_add16;
+                name = "idct_add16";
                 break;
             case 1:
-                idct = h.h264_idct_add16intra;
-                name = "h264_idct_add16intra";
+                idct = h.idct_add16intra;
+                name = "idct_add16intra";
                 intra = 1;
                 break;
             case 2:
-                idct = h.h264_idct8_add4;
-                name = "h264_idct8_add4";
+                idct = h.idct8_add4;
+                name = "idct8_add4";
                 sz = 8;
                 break;
             }
@@ -361,7 +361,7 @@ static void check_idct_dequant(void)
         memset(dst_ref, 0, 16 * 16 * SIZEOF_COEF);
         memset(dst_new, 0, 16 * 16 * SIZEOF_COEF);
 
-        if (check_func(h.h264_luma_dc_dequant_idct, 
"h264_luma_dc_dequant_idct_%d", bit_depth)) {
+        if (check_func(h.luma_dc_dequant_idct, "luma_dc_dequant_idct_%d", 
bit_depth)) {
 
             call_ref(dst_ref, src, qmul);
             call_new(dst_new, src, qmul);
@@ -425,16 +425,16 @@ static void check_loop_filter(void)
             }                                                           \
         } while (0)
 
-        CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,);
-        CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,);
+        CHECK_LOOP_FILTER(v_loop_filter_luma, 1,);
+        CHECK_LOOP_FILTER(h_loop_filter_luma, 0,);
+        CHECK_LOOP_FILTER(h_loop_filter_luma_mbaff, 0,);
+        CHECK_LOOP_FILTER(v_loop_filter_chroma, 1,);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma, 0,);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff, 0,);
 
         ff_h264dsp_init(&h, bit_depth, 2);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma, 0, 422);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff, 0, 422);
 #undef CHECK_LOOP_FILTER
     }
 }
@@ -486,16 +486,16 @@ static void check_loop_filter_intra(void)
             }                                                           \
         } while (0)
 
-        CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,);
-        CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,);
+        CHECK_LOOP_FILTER(v_loop_filter_luma_intra, 1,);
+        CHECK_LOOP_FILTER(h_loop_filter_luma_intra, 0,);
+        CHECK_LOOP_FILTER(h_loop_filter_luma_mbaff_intra, 0,);
+        CHECK_LOOP_FILTER(v_loop_filter_chroma_intra, 1,);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma_intra, 0,);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff_intra, 0,);
 
         ff_h264dsp_init(&h, bit_depth, 2);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422);
-        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma_intra, 0, 422);
+        CHECK_LOOP_FILTER(h_loop_filter_chroma_mbaff_intra, 0, 422);
 #undef CHECK_LOOP_FILTER
     }
 }
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] avcodec/x86/h264_deblock: Various improvements (PR #21535)

Reply via email to