This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit dbdf514c17a081328fa9f035cf03cb9b74c85567 Author: Andreas Rheinhardt <[email protected]> AuthorDate: Wed Jan 7 12:46:22 2026 +0100 Commit: Andreas Rheinhardt <[email protected]> CommitDate: Sun Jan 25 22:53:21 2026 +0100 avcodec/x86/h264_deblock_10bit: Remove custom stack allocation code Allocate it via cglobal as usual. This makes the SSE2/AVX functions available when HAVE_ALIGNED_STACK is false; it also avoids modifying rsp unnecessarily in the deblock_h_luma_intra_10 functions on Win64. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/h264_deblock_10bit.asm | 23 +++++------------------ libavcodec/x86/h264dsp_init.c | 4 ---- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index 033f2f4d55..1ea5ce4b28 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -153,14 +153,12 @@ cextern pw_1023 ; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- -cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) - %assign pad 5*mmsize+12-(stack_offset&15) +cglobal deblock_v_luma_10, 5,5,8,-5*mmsize %define tcm [rsp] %define ms1 [rsp+mmsize] %define ms2 [rsp+mmsize*2] %define am [rsp+mmsize*3] %define bm [rsp+mmsize*4] - SUB rsp, pad shl r2d, 2 shl r3d, 2 LOAD_AB m4, m5, r2d, r3d @@ -205,11 +203,9 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) add r4, mmsize/8 dec r3 jg .loop - ADD rsp, pad RET -cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) - %assign pad 7*mmsize+12-(stack_offset&15) +cglobal deblock_h_luma_10, 5,6,8,-7*mmsize %define tcm [rsp] %define ms1 [rsp+mmsize] %define ms2 [rsp+mmsize*2] @@ -217,7 +213,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) %define p2m [rsp+mmsize*4] %define am [rsp+mmsize*5] %define bm [rsp+mmsize*6] - SUB rsp, pad shl r2d, 2 shl r3d, 2 LOAD_AB m4, m5, r2d, r3d @@ -295,7 +290,6 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) lea r2, [r2+r1*(mmsize/2)] dec r5 jg .loop - ADD rsp, pad RET %endmacro @@ -482,7 +476,6 @@ DEBLOCK_LUMA_64 %endmacro %macro LUMA_INTRA_INIT 1 - %xdefine pad %1*mmsize+((gprsize*3) % mmsize)-(stack_offset&15) %define t0 m4 %define t1 m5 %define t2 m6 @@ -492,7 +485,6 @@ DEBLOCK_LUMA_64 CAT_XDEFINE t, i, [rsp+mmsize*(i-4)] %assign i i+1 %endrep - SUB rsp, pad %endmacro ; in: %1-%3=tmp, %4=p2, %5=q2 @@ -654,7 +646,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16 ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_10, 4,7,16 +cglobal deblock_h_luma_intra_10, 4,7,16,mmsize %define t0 m15 %define t1 m14 %define t2 m2 @@ -667,8 +659,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16 %define p2 m13 %define p3 m4 %define spill [rsp] - %assign pad 24-(stack_offset&15) - SUB rsp, pad lea r4, [r1*4] lea r5, [r1*3] ; 3*stride add r4, r0 ; pix+4*stride @@ -709,7 +699,6 @@ cglobal deblock_h_luma_intra_10, 4,7,16 lea r4, [r4+r1*8] dec r6 jg .loop - ADD rsp, pad RET %endmacro @@ -727,7 +716,7 @@ DEBLOCK_LUMA_INTRA_64 ; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- -cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) +cglobal deblock_v_luma_intra_10, 4,7,8,-3*mmsize LUMA_INTRA_INIT 3 lea r4, [r1*4] lea r5, [r1*3] @@ -749,14 +738,13 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) add r4, mmsize dec r6 jg .loop - ADD rsp, pad RET ;----------------------------------------------------------------------------- ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) +cglobal deblock_h_luma_intra_10, 4,7,8,-8*mmsize LUMA_INTRA_INIT 8 %if mmsize == 8 lea r4, [r1*3] @@ -793,7 +781,6 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) dec r6 %endif jg .loop - ADD rsp, pad RET %endmacro diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 66c2f36908..a62de09577 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -314,12 +314,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, } else { c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2; } -#if HAVE_ALIGNED_STACK c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; -#endif /* HAVE_ALIGNED_STACK */ } if (EXTERNAL_SSE4(cpu_flags)) { c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; @@ -354,12 +352,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, } else { c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx; } -#if HAVE_ALIGNED_STACK c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; -#endif /* HAVE_ALIGNED_STACK */ } } } _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
