PR #21127 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21127
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21127.patch


>From 79925ef71461facbfdbaf0444b980b668f500aa3 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Mon, 8 Dec 2025 06:14:24 +0100
Subject: [PATCH 1/2] avcodec/ppc/vc1dsp_altivec: Don't read too much data

vc1_inv_trans_8x4_altivec() is supposed to process a block
of 8x4 words, yet it read and processed eight lines. This led
to ASAN failures (see [1]) that this commit intends to fix.
It should also lead to performance improvements, but I don't have
real hardware to bench it.

[1]: 
https://fate.ffmpeg.org/report.cgi?time=20251207214004&slot=ppc64-linux-gcc-14.3-asan

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/ppc/vc1dsp_altivec.c | 66 +++++++++++++++++----------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index bbadb2aaee..dd0473664e 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -235,7 +235,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, 
ptrdiff_t stride,
 {
     vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
     vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
-    vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+    vector signed int s8, s9, sA, sB;
     vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
     const vector signed int vec_64 = vec_sl(vec_splat_s32(4), 
vec_splat_u32(4));
     const vector unsigned int vec_7 = vec_splat_u32(7);
@@ -253,40 +253,42 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, 
ptrdiff_t stride,
     src1 = vec_ld( 16, block);
     src2 = vec_ld( 32, block);
     src3 = vec_ld( 48, block);
-    src4 = vec_ld( 64, block);
-    src5 = vec_ld( 80, block);
-    src6 = vec_ld( 96, block);
-    src7 = vec_ld(112, block);
 
-    TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
-    s0 = vec_unpackl(src0);
-    s1 = vec_unpackl(src1);
-    s2 = vec_unpackl(src2);
-    s3 = vec_unpackl(src3);
-    s4 = vec_unpackl(src4);
-    s5 = vec_unpackl(src5);
-    s6 = vec_unpackl(src6);
-    s7 = vec_unpackl(src7);
-    s8 = vec_unpackh(src0);
-    s9 = vec_unpackh(src1);
-    sA = vec_unpackh(src2);
-    sB = vec_unpackh(src3);
-    sC = vec_unpackh(src4);
-    sD = vec_unpackh(src5);
-    sE = vec_unpackh(src6);
-    sF = vec_unpackh(src7);
+// Transpose 8x4 matrix of 16-bit elements (in-place)
+    vec_s16 A1, B1, C1, D1;
+    vec_s16 A2, B2, C2, D2;
+
+    A1 = vec_mergeh(src0, src2);
+    B1 = vec_mergel(src0, src2);
+    C1 = vec_mergeh(src1, src3);
+    D1 = vec_mergel(src1, src3);
+
+    A2 = vec_mergeh(A1, C1);
+    B2 = vec_mergel(A1, C1);
+    C2 = vec_mergeh(B1, D1);
+    D2 = vec_mergel(B1, D1);
+
+    s0 = vec_unpackh(A2);
+    s1 = vec_unpackl(A2);
+    s2 = vec_unpackh(B2);
+    s3 = vec_unpackl(B2);
+    s4 = vec_unpackh(C2);
+    s5 = vec_unpackl(C2);
+    s6 = vec_unpackh(D2);
+    s7 = vec_unpackl(D2);
+
     STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
     SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
-    STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
-    SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
-    src0 = vec_pack(s8, s0);
-    src1 = vec_pack(s9, s1);
-    src2 = vec_pack(sA, s2);
-    src3 = vec_pack(sB, s3);
-    src4 = vec_pack(sC, s4);
-    src5 = vec_pack(sD, s5);
-    src6 = vec_pack(sE, s6);
-    src7 = vec_pack(sF, s7);
+
+    src0 = vec_pack(s0, s0);
+    src1 = vec_pack(s1, s1);
+    src2 = vec_pack(s2, s2);
+    src3 = vec_pack(s3, s3);
+    src4 = vec_pack(s4, s4);
+    src5 = vec_pack(s5, s5);
+    src6 = vec_pack(s6, s6);
+    src7 = vec_pack(s7, s7);
+
     TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
 
     s0 = vec_unpackh(src0);
-- 
2.49.1


>From 0d1fe859603bce2c837b863d12d3a120e906098c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Mon, 8 Dec 2025 06:36:28 +0100
Subject: [PATCH 2/2] avcodec/ppc/vc1dsp_altive, h264chroma_template: Disable
 unused variables

Move the variables only used by big-endian code inside the #if
HAVE_BIGENDIAN blocks.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/ppc/h264chroma_template.c | 10 ++++------
 libavcodec/ppc/vc1dsp_altivec.c      |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/libavcodec/ppc/h264chroma_template.c 
b/libavcodec/ppc/h264chroma_template.c
index c64856bb14..9455a55dd1 100644
--- a/libavcodec/ppc/h264chroma_template.c
+++ b/libavcodec/ppc/h264chroma_template.c
@@ -129,7 +129,6 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, 
const uint8_t * src,
     const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
     const vec_u16 v6us = vec_splat_u16(6);
 
-    vec_u8 vsrcperm0, vsrcperm1;
     vec_u8 vsrc0uc, vsrc1uc;
     vec_s16 vsrc0ssH, vsrc1ssH;
     vec_u8 vsrc2uc, vsrc3uc;
@@ -138,8 +137,8 @@ static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, 
const uint8_t * src,
 #if HAVE_BIGENDIAN
     register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
-    vsrcperm0 = vec_lvsl(0, src);
-    vsrcperm1 = vec_lvsl(1, src);
+    vec_u8 vsrcperm0 = vec_lvsl(0, src);
+    vec_u8 vsrcperm1 = vec_lvsl(1, src);
 #endif
 
     if (((unsigned long)dst) % 16 == 0) {
@@ -204,7 +203,6 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t 
*dst, const uint8_t *sr
     const vec_s16 v28ss = 
vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
     const vec_u16 v6us  = vec_splat_u16(6);
 
-    vec_u8 vsrcperm0, vsrcperm1;
     vec_u8 vsrc0uc, vsrc1uc;
     vec_s16 vsrc0ssH, vsrc1ssH;
     vec_u8 vsrc2uc, vsrc3uc;
@@ -213,8 +211,8 @@ static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t 
*dst, const uint8_t *sr
 #if HAVE_BIGENDIAN
     register int loadSecond     = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
-    vsrcperm0 = vec_lvsl(0, src);
-    vsrcperm1 = vec_lvsl(1, src);
+    vec_u8 vsrcperm0 = vec_lvsl(0, src);
+    vec_u8 vsrcperm1 = vec_lvsl(1, src);
 #endif
 
     if (((unsigned long)dst) % 16 == 0) {
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index dd0473664e..31e9b0010d 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -247,7 +247,6 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, 
ptrdiff_t stride,
     const vector unsigned int vec_1 = vec_splat_u32(1);
     vector unsigned char tmp;
     vector signed short tmp2, tmp3;
-    vector unsigned char perm0, perm1, p0, p1, p;
 
     src0 = vec_ld(  0, block);
     src1 = vec_ld( 16, block);
@@ -309,6 +308,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, 
ptrdiff_t stride,
     src3 = vec_pack(s3, sB);
 
 #if HAVE_BIGENDIAN
+    vector unsigned char perm0, perm1, p0, p1, p;
     p0 = vec_lvsl (0, dest);
     p1 = vec_lvsl (stride, dest);
     p = vec_splat_u8 (-1);
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to