h264_deblock_10bit: Remove custom stack allocation code

Andreas Rheinhardt via ffmpeg-cvslog Sun, 25 Jan 2026 14:52:49 -0800

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit dbdf514c17a081328fa9f035cf03cb9b74c85567
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Wed Jan 7 12:46:22 2026 +0100
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Sun Jan 25 22:53:21 2026 +0100

    avcodec/x86/h264_deblock_10bit: Remove custom stack allocation code
    
    Allocate it via cglobal as usual. This makes the SSE2/AVX functions
    available when HAVE_ALIGNED_STACK is false; it also avoids
    modifying rsp unnecessarily in the deblock_h_luma_intra_10 functions
    on Win64.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/h264_deblock_10bit.asm | 23 +++++------------------
 libavcodec/x86/h264dsp_init.c         |  4 ----
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index 033f2f4d55..1ea5ce4b28 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -153,14 +153,12 @@ cextern pw_1023
 ; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta,
 ;                           int8_t *tc0)
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
-    %assign pad 5*mmsize+12-(stack_offset&15)
+cglobal deblock_v_luma_10, 5,5,8,-5*mmsize
     %define tcm [rsp]
     %define ms1 [rsp+mmsize]
     %define ms2 [rsp+mmsize*2]
     %define am  [rsp+mmsize*3]
     %define bm  [rsp+mmsize*4]
-    SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB     m4, m5, r2d, r3d
@@ -205,11 +203,9 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
     add         r4, mmsize/8
     dec         r3
     jg .loop
-    ADD         rsp, pad
     RET
 
-cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
-    %assign pad 7*mmsize+12-(stack_offset&15)
+cglobal deblock_h_luma_10, 5,6,8,-7*mmsize
     %define tcm [rsp]
     %define ms1 [rsp+mmsize]
     %define ms2 [rsp+mmsize*2]
@@ -217,7 +213,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
     %define p2m [rsp+mmsize*4]
     %define am  [rsp+mmsize*5]
     %define bm  [rsp+mmsize*6]
-    SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB     m4, m5, r2d, r3d
@@ -295,7 +290,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
     lea         r2, [r2+r1*(mmsize/2)]
     dec         r5
     jg .loop
-    ADD        rsp, pad
     RET
 %endmacro
 
@@ -482,7 +476,6 @@ DEBLOCK_LUMA_64
 %endmacro
 
 %macro LUMA_INTRA_INIT 1
-    %xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15)
     %define t0 m4
     %define t1 m5
     %define t2 m6
@@ -492,7 +485,6 @@ DEBLOCK_LUMA_64
     CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
     %assign i i+1
 %endrep
-    SUB    rsp, pad
 %endmacro
 
 ; in: %1-%3=tmp, %4=p2, %5=q2
@@ -654,7 +646,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                 int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10, 4,7,16
+cglobal deblock_h_luma_intra_10, 4,7,16,mmsize
     %define t0 m15
     %define t1 m14
     %define t2 m2
@@ -667,8 +659,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16
     %define p2 m13
     %define p3 m4
     %define spill [rsp]
-    %assign pad 24-(stack_offset&15)
-    SUB     rsp, pad
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
     add     r4, r0     ; pix+4*stride
@@ -709,7 +699,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16
     lea     r4, [r4+r1*8]
     dec     r6
     jg .loop
-    ADD    rsp, pad
     RET
 %endmacro
 
@@ -727,7 +716,7 @@ DEBLOCK_LUMA_INTRA_64
 ; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                 int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
+cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize
     LUMA_INTRA_INIT 3
     lea     r4, [r1*4]
     lea     r5, [r1*3]
@@ -749,14 +738,13 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
     add     r4, mmsize
     dec     r6
     jg .loop
-    ADD    rsp, pad
     RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
 ;                                 int beta)
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
+cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize
     LUMA_INTRA_INIT 8
 %if mmsize == 8
     lea     r4, [r1*3]
@@ -793,7 +781,6 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
     dec     r6
 %endif
     jg .loop
-    ADD    rsp, pad
     RET
 %endmacro
 
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 66c2f36908..a62de09577 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -314,12 +314,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
             } else {
                 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
             }
-#if HAVE_ALIGNED_STACK
             c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
             c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
             c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
             c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
-#endif /* HAVE_ALIGNED_STACK */
         }
         if (EXTERNAL_SSE4(cpu_flags)) {
             c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
@@ -354,12 +352,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
             } else {
                 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
             }
-#if HAVE_ALIGNED_STACK
             c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
             c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
             c->h264_v_loop_filter_luma_intra   = 
ff_deblock_v_luma_intra_10_avx;
             c->h264_h_loop_filter_luma_intra   = 
ff_deblock_h_luma_intra_10_avx;
-#endif /* HAVE_ALIGNED_STACK */
         }
     }
 }

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 06/13: avcodec/x86/h264_deblock_10bit: Remove custom stack allocation code

Reply via email to