PR #21127 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21127 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21127.patch
>From 79925ef71461facbfdbaf0444b980b668f500aa3 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Mon, 8 Dec 2025 06:14:24 +0100 Subject: [PATCH 1/2] avcodec/ppc/vc1dsp_altivec: Don't read too much data vc1_inv_trans_8x4_altivec() is supposed to process a block of 8x4 words, yet it read and processed eight lines. This led to ASAN failures (see [1]) that this commit intends to fix. It should also lead to performance improvements, but I don't have real hardware to bench it. [1]: https://fate.ffmpeg.org/report.cgi?time=20251207214004&slot=ppc64-linux-gcc-14.3-asan Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/ppc/vc1dsp_altivec.c | 66 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c index bbadb2aaee..dd0473664e 100644 --- a/libavcodec/ppc/vc1dsp_altivec.c +++ b/libavcodec/ppc/vc1dsp_altivec.c @@ -235,7 +235,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride, { vector signed short src0, src1, src2, src3, src4, src5, src6, src7; vector signed int s0, s1, s2, s3, s4, s5, s6, s7; - vector signed int s8, s9, sA, sB, sC, sD, sE, sF; + vector signed int s8, s9, sA, sB; vector signed int t0, t1, t2, t3, t4, t5, t6, t7; const vector signed int vec_64 = vec_sl(vec_splat_s32(4), vec_splat_u32(4)); const vector unsigned int vec_7 = vec_splat_u32(7); @@ -253,40 +253,42 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride, src1 = vec_ld( 16, block); src2 = vec_ld( 32, block); src3 = vec_ld( 48, block); - src4 = vec_ld( 64, block); - src5 = vec_ld( 80, block); - src6 = vec_ld( 96, block); - src7 = vec_ld(112, block); - TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); - s0 = vec_unpackl(src0); - s1 = vec_unpackl(src1); - s2 = vec_unpackl(src2); - s3 = vec_unpackl(src3); - s4 = vec_unpackl(src4); - s5 = vec_unpackl(src5); - s6 = vec_unpackl(src6); - s7 = vec_unpackl(src7); - s8 = vec_unpackh(src0); - s9 = vec_unpackh(src1); - sA = vec_unpackh(src2); - sB = vec_unpackh(src3); - sC = vec_unpackh(src4); - sD = vec_unpackh(src5); - sE = vec_unpackh(src6); - sF = vec_unpackh(src7); +// Transpose 8x4 matrix of 16-bit elements (in-place) + vec_s16 A1, B1, C1, D1; + vec_s16 A2, B2, C2, D2; + + A1 = vec_mergeh(src0, src2); + B1 = vec_mergel(src0, src2); + C1 = vec_mergeh(src1, src3); + D1 = vec_mergel(src1, src3); + + A2 = vec_mergeh(A1, C1); + B2 = vec_mergel(A1, C1); + C2 = vec_mergeh(B1, D1); + D2 = vec_mergel(B1, D1); + + s0 = vec_unpackh(A2); + s1 = vec_unpackl(A2); + s2 = vec_unpackh(B2); + s3 = vec_unpackl(B2); + s4 = vec_unpackh(C2); + s5 = vec_unpackl(C2); + s6 = vec_unpackh(D2); + s7 = vec_unpackl(D2); + STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s); SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7); - STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s); - SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF); - src0 = vec_pack(s8, s0); - src1 = vec_pack(s9, s1); - src2 = vec_pack(sA, s2); - src3 = vec_pack(sB, s3); - src4 = vec_pack(sC, s4); - src5 = vec_pack(sD, s5); - src6 = vec_pack(sE, s6); - src7 = vec_pack(sF, s7); + + src0 = vec_pack(s0, s0); + src1 = vec_pack(s1, s1); + src2 = vec_pack(s2, s2); + src3 = vec_pack(s3, s3); + src4 = vec_pack(s4, s4); + src5 = vec_pack(s5, s5); + src6 = vec_pack(s6, s6); + src7 = vec_pack(s7, s7); + TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); s0 = vec_unpackh(src0); -- 2.49.1 >From 0d1fe859603bce2c837b863d12d3a120e906098c Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Mon, 8 Dec 2025 06:36:28 +0100 Subject: [PATCH 2/2] avcodec/ppc/vc1dsp_altive, h264chroma_template: Disable unused variables Move the variables only used by big-endian code inside the #if HAVE_BIGENDIAN blocks. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/ppc/h264chroma_template.c | 10 ++++------ libavcodec/ppc/vc1dsp_altivec.c | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/libavcodec/ppc/h264chroma_template.c b/libavcodec/ppc/h264chroma_template.c index c64856bb14..9455a55dd1 100644 --- a/libavcodec/ppc/h264chroma_template.c +++ b/libavcodec/ppc/h264chroma_template.c @@ -129,7 +129,6 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, const uint8_t * src, const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); const vec_u16 v6us = vec_splat_u16(6); - vec_u8 vsrcperm0, vsrcperm1; vec_u8 vsrc0uc, vsrc1uc; vec_s16 vsrc0ssH, vsrc1ssH; vec_u8 vsrc2uc, vsrc3uc; @@ -138,8 +137,8 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, const uint8_t * src, #if HAVE_BIGENDIAN register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; - vsrcperm0 = vec_lvsl(0, src); - vsrcperm1 = vec_lvsl(1, src); + vec_u8 vsrcperm0 = vec_lvsl(0, src); + vec_u8 vsrcperm1 = vec_lvsl(1, src); #endif if (((unsigned long)dst) % 16 == 0) { @@ -204,7 +203,6 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, const uint8_t *sr const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4)); const vec_u16 v6us = vec_splat_u16(6); - vec_u8 vsrcperm0, vsrcperm1; vec_u8 vsrc0uc, vsrc1uc; vec_s16 vsrc0ssH, vsrc1ssH; vec_u8 vsrc2uc, vsrc3uc; @@ -213,8 +211,8 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t *dst, const uint8_t *sr #if HAVE_BIGENDIAN register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; - vsrcperm0 = vec_lvsl(0, src); - vsrcperm1 = vec_lvsl(1, src); + vec_u8 vsrcperm0 = vec_lvsl(0, src); + vec_u8 vsrcperm1 = vec_lvsl(1, src); #endif if (((unsigned long)dst) % 16 == 0) { diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c index dd0473664e..31e9b0010d 100644 --- a/libavcodec/ppc/vc1dsp_altivec.c +++ b/libavcodec/ppc/vc1dsp_altivec.c @@ -247,7 +247,6 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride, const vector unsigned int vec_1 = vec_splat_u32(1); vector unsigned char tmp; vector signed short tmp2, tmp3; - vector unsigned char perm0, perm1, p0, p1, p; src0 = vec_ld( 0, block); src1 = vec_ld( 16, block); @@ -309,6 +308,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, ptrdiff_t stride, src3 = vec_pack(s3, sB); #if HAVE_BIGENDIAN + vector unsigned char perm0, perm1, p0, p1, p; p0 = vec_lvsl (0, dest); p1 = vec_lvsl (stride, dest); p = vec_splat_u8 (-1); -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
