The branch, master has been updated
via d355749ca671e4c33bdca773c32c627eefb28623 (commit)
via f4d9fb0bd0a284853da04cb8bd538bb9a77e0db0 (commit)
via 23efbb5e2e26b39a0532a4773b2cb7cafe0e0b73 (commit)
from 7770c0bf0d958851b4aaf42de9246a7cd874e15c (commit)
- Log -----------------------------------------------------------------
commit d355749ca671e4c33bdca773c32c627eefb28623
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Thu Oct 30 09:58:13 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Sun Nov 2 09:46:15 2025 +0100
avcodec/x86/hevc/add_res: Avoid unnecessary modification
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/hevc/add_res.asm b/libavcodec/x86/hevc/add_res.asm
index 8abfcab893..3489e04e2b 100644
--- a/libavcodec/x86/hevc/add_res.asm
+++ b/libavcodec/x86/hevc/add_res.asm
@@ -27,9 +27,9 @@ cextern pw_1023
%define max_pixels_10 pw_1023
; the add_res macros and functions were largely inspired by h264_idct.asm from
the x264 project
-%macro ADD_RES_MMX_4_8 0
- mova m0, [r1]
- mova m2, [r1+8]
+%macro ADD_RES_MMX_4_8 1
+ mova m0, [r1+%1]
+ mova m2, [r1+%1+8]
movd m1, [r0]
movd m3, [r0+r2]
@@ -50,27 +50,26 @@ INIT_MMX mmxext
; void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
cglobal hevc_add_residual_4_8, 3, 3, 6
pxor m4, m4
- ADD_RES_MMX_4_8
- add r1, 16
+ ADD_RES_MMX_4_8 0
lea r0, [r0+r2*2]
- ADD_RES_MMX_4_8
+ ADD_RES_MMX_4_8 16
RET
-%macro ADD_RES_SSE_8_8 0
+%macro ADD_RES_SSE_8_8 1
movq m0, [r0]
movq m1, [r0+r2]
punpcklbw m0, m4
punpcklbw m1, m4
- paddsw m0, [r1]
- paddsw m1, [r1+16]
+ paddsw m0, [r1+%1]
+ paddsw m1, [r1+%1+16]
packuswb m0, m1
movq m2, [r0+r2*2]
movq m3, [r0+r3]
punpcklbw m2, m4
punpcklbw m3, m4
- paddsw m2, [r1+32]
- paddsw m3, [r1+48]
+ paddsw m2, [r1+%1+32]
+ paddsw m3, [r1+%1+48]
packuswb m2, m3
movq [r0], m0
@@ -124,10 +123,9 @@ INIT_XMM sse2
cglobal hevc_add_residual_8_8, 3, 4, 5
pxor m4, m4
lea r3, [r2*3]
- ADD_RES_SSE_8_8
- add r1, 64
+ ADD_RES_SSE_8_8 0
lea r0, [r0+r2*4]
- ADD_RES_SSE_8_8
+ ADD_RES_SSE_8_8 64
RET
; void ff_hevc_add_residual_16_8_<opt>(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
@@ -292,9 +290,8 @@ cglobal hevc_add_residual_4_10, 3, 3, 6
pxor m2, m2
mova m3, [max_pixels_10]
ADD_RES_MMX_4_10 r0, r2, r1
- add r1, 16
lea r0, [r0+2*r2]
- ADD_RES_MMX_4_10 r0, r2, r1
+ ADD_RES_MMX_4_10 r0, r2, r1+16
RET
INIT_XMM sse2
@@ -305,8 +302,7 @@ cglobal hevc_add_residual_8_10, 3, 4, 6
ADD_RES_SSE_8_10 r0, r2, r3, r1
lea r0, [r0+r2*4]
- add r1, 64
- ADD_RES_SSE_8_10 r0, r2, r3, r1
+ ADD_RES_SSE_8_10 r0, r2, r3, r1+64
RET
cglobal hevc_add_residual_16_10, 3, 5, 6
commit f4d9fb0bd0a284853da04cb8bd538bb9a77e0db0
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Thu Oct 30 08:49:38 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Sun Nov 2 09:46:15 2025 +0100
avcodec/x86/hevc/add_res: Reduce number of registers used
This makes these functions use only volatile registers (even on Win64).
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/hevc/add_res.asm b/libavcodec/x86/hevc/add_res.asm
index 5d7115620f..8abfcab893 100644
--- a/libavcodec/x86/hevc/add_res.asm
+++ b/libavcodec/x86/hevc/add_res.asm
@@ -61,20 +61,16 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
movq m1, [r0+r2]
punpcklbw m0, m4
punpcklbw m1, m4
- mova m2, [r1]
- mova m3, [r1+16]
- paddsw m0, m2
- paddsw m1, m3
+ paddsw m0, [r1]
+ paddsw m1, [r1+16]
packuswb m0, m1
movq m2, [r0+r2*2]
movq m3, [r0+r3]
punpcklbw m2, m4
punpcklbw m3, m4
- mova m6, [r1+32]
- mova m7, [r1+48]
- paddsw m2, m6
- paddsw m3, m7
+ paddsw m2, [r1+32]
+ paddsw m3, [r1+48]
packuswb m2, m3
movq [r0], m0
@@ -88,27 +84,33 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
mova m2, m1
punpcklbw m1, m0
punpckhbw m2, m0
+%if cpuflag(avx2)
mova xm5, [r1+%1]
mova xm6, [r1+%1+16]
-%if cpuflag(avx2)
vinserti128 m5, m5, [r1+%1+32], 1
vinserti128 m6, m6, [r1+%1+48], 1
-%endif
paddsw m1, m5
paddsw m2, m6
+%else
+ paddsw m1, [r1+%1]
+ paddsw m2, [r1+%1+16]
+%endif
mova m3, [%3]
mova m4, m3
punpcklbw m3, m0
punpckhbw m4, m0
+%if cpuflag(avx2)
mova xm5, [r1+%1+mmsize*2]
mova xm6, [r1+%1+mmsize*2+16]
-%if cpuflag(avx2)
vinserti128 m5, m5, [r1+%1+96], 1
vinserti128 m6, m6, [r1+%1+112], 1
-%endif
paddsw m3, m5
paddsw m4, m6
+%else
+ paddsw m3, [r1+%1+mmsize*2]
+ paddsw m4, [r1+%1+mmsize*2+16]
+%endif
packuswb m1, m2
packuswb m3, m4
@@ -119,7 +121,7 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
INIT_XMM sse2
; void ff_hevc_add_residual_8_8_<opt>(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
-cglobal hevc_add_residual_8_8, 3, 4, 8
+cglobal hevc_add_residual_8_8, 3, 4, 5
pxor m4, m4
lea r3, [r2*3]
ADD_RES_SSE_8_8
@@ -129,7 +131,7 @@ cglobal hevc_add_residual_8_8, 3, 4, 8
RET
; void ff_hevc_add_residual_16_8_<opt>(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
-cglobal hevc_add_residual_16_8, 3, 5, 7
+cglobal hevc_add_residual_16_8, 3, 5, 5
pxor m0, m0
lea r3, [r2*3]
mov r4d, 4
@@ -143,7 +145,7 @@ cglobal hevc_add_residual_16_8, 3, 5, 7
RET
; void ff_hevc_add_residual_32_8_<opt>(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
-cglobal hevc_add_residual_32_8, 3, 5, 7
+cglobal hevc_add_residual_32_8, 3, 5, 5
pxor m0, m0
mov r4d, 16
.loop:
commit 23efbb5e2e26b39a0532a4773b2cb7cafe0e0b73
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Thu Oct 30 08:30:40 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Sun Nov 2 09:46:15 2025 +0100
avcodec/x86/hevc/add_res: Remove AVX add_residual functions
The AVX and SSE2 functions are identical except for the VEX encodings
used since e9abef437f0a348c017d4ac8b23a122881c1dc87 and
8b8492452d53293b2ac8c842877fadf7925fc950.
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/hevc/add_res.asm b/libavcodec/x86/hevc/add_res.asm
index 3ecbd4269c..5d7115620f 100644
--- a/libavcodec/x86/hevc/add_res.asm
+++ b/libavcodec/x86/hevc/add_res.asm
@@ -117,7 +117,7 @@ cglobal hevc_add_residual_4_8, 3, 3, 6
%endmacro
-%macro TRANSFORM_ADD_8 0
+INIT_XMM sse2
; void ff_hevc_add_residual_8_8_<opt>(uint8_t *dst, const int16_t *res,
ptrdiff_t stride)
cglobal hevc_add_residual_8_8, 3, 4, 8
pxor m4, m4
@@ -154,12 +154,7 @@ cglobal hevc_add_residual_32_8, 3, 5, 7
dec r4d
jg .loop
RET
-%endmacro
-INIT_XMM sse2
-TRANSFORM_ADD_8
-INIT_XMM avx
-TRANSFORM_ADD_8
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
diff --git a/libavcodec/x86/hevc/dsp.h b/libavcodec/x86/hevc/dsp.h
index 03986b970a..0062699ce0 100644
--- a/libavcodec/x86/hevc/dsp.h
+++ b/libavcodec/x86/hevc/dsp.h
@@ -172,10 +172,6 @@ void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, const
int16_t *res, ptrdiff_t s
void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
-void ff_hevc_add_residual_8_8_avx(uint8_t *dst, const int16_t *res, ptrdiff_t
stride);
-void ff_hevc_add_residual_16_8_avx(uint8_t *dst, const int16_t *res, ptrdiff_t
stride);
-void ff_hevc_add_residual_32_8_avx(uint8_t *dst, const int16_t *res, ptrdiff_t
stride);
-
void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, const int16_t *res,
ptrdiff_t stride);
diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index 6966340c42..f1558b7e3e 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -877,10 +877,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int
bit_depth)
c->idct[0] = ff_hevc_idct_4x4_8_avx;
c->idct[1] = ff_hevc_idct_8x8_8_avx;
-
- c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
- c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
- c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
}
if (EXTERNAL_AVX2(cpu_flags)) {
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
-----------------------------------------------------------------------
Summary of changes:
libavcodec/x86/hevc/add_res.asm | 63 ++++++++++++++++++-----------------------
libavcodec/x86/hevc/dsp.h | 4 ---
libavcodec/x86/hevc/dsp_init.c | 4 ---
3 files changed, 28 insertions(+), 43 deletions(-)
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]