This is an automated email from the git hooks/post-receive script.
Git pushed a commit to branch master
in repository ffmpeg.
The following commit(s) were added to refs/heads/master by this push:
new a72e01b4ec avcodec/ppc/vc1dsp_altivec: Don't read too much data
a72e01b4ec is described below
commit a72e01b4ec762fe00eac82648ba8d850fdbba64d
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Mon Dec 8 06:14:24 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Fri Dec 12 09:44:01 2025 +0100
avcodec/ppc/vc1dsp_altivec: Don't read too much data
vc1_inv_trans_8x4_altivec() is supposed to process a block
of 8x4 words, yet it read and processed eight lines. This led
to ASAN failures (see [1]) that this commit intends to fix.
It should also lead to performance improvements, but I don't have
real hardware to bench it.
[1]:
https://fate.ffmpeg.org/report.cgi?time=20251207214004&slot=ppc64-linux-gcc-14.3-asan
Reviewed-by: Sean McGovern <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
---
libavcodec/ppc/vc1dsp_altivec.c | 66 +++++++++++++++++++++--------------------
1 file changed, 34 insertions(+), 32 deletions(-)
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index bbadb2aaee..dd0473664e 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -235,7 +235,7 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest,
ptrdiff_t stride,
{
vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
- vector signed int s8, s9, sA, sB, sC, sD, sE, sF;
+ vector signed int s8, s9, sA, sB;
vector signed int t0, t1, t2, t3, t4, t5, t6, t7;
const vector signed int vec_64 = vec_sl(vec_splat_s32(4),
vec_splat_u32(4));
const vector unsigned int vec_7 = vec_splat_u32(7);
@@ -253,40 +253,42 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest,
ptrdiff_t stride,
src1 = vec_ld( 16, block);
src2 = vec_ld( 32, block);
src3 = vec_ld( 48, block);
- src4 = vec_ld( 64, block);
- src5 = vec_ld( 80, block);
- src6 = vec_ld( 96, block);
- src7 = vec_ld(112, block);
- TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
- s0 = vec_unpackl(src0);
- s1 = vec_unpackl(src1);
- s2 = vec_unpackl(src2);
- s3 = vec_unpackl(src3);
- s4 = vec_unpackl(src4);
- s5 = vec_unpackl(src5);
- s6 = vec_unpackl(src6);
- s7 = vec_unpackl(src7);
- s8 = vec_unpackh(src0);
- s9 = vec_unpackh(src1);
- sA = vec_unpackh(src2);
- sB = vec_unpackh(src3);
- sC = vec_unpackh(src4);
- sD = vec_unpackh(src5);
- sE = vec_unpackh(src6);
- sF = vec_unpackh(src7);
+// Transpose 8x4 matrix of 16-bit elements (in-place)
+ vec_s16 A1, B1, C1, D1;
+ vec_s16 A2, B2, C2, D2;
+
+ A1 = vec_mergeh(src0, src2);
+ B1 = vec_mergel(src0, src2);
+ C1 = vec_mergeh(src1, src3);
+ D1 = vec_mergel(src1, src3);
+
+ A2 = vec_mergeh(A1, C1);
+ B2 = vec_mergel(A1, C1);
+ C2 = vec_mergeh(B1, D1);
+ D2 = vec_mergel(B1, D1);
+
+ s0 = vec_unpackh(A2);
+ s1 = vec_unpackl(A2);
+ s2 = vec_unpackh(B2);
+ s3 = vec_unpackl(B2);
+ s4 = vec_unpackh(C2);
+ s5 = vec_unpackl(C2);
+ s6 = vec_unpackh(D2);
+ s7 = vec_unpackl(D2);
+
STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_4s);
SHIFT_HOR8(s0, s1, s2, s3, s4, s5, s6, s7);
- STEP8(s8, s9, sA, sB, sC, sD, sE, sF, vec_4s);
- SHIFT_HOR8(s8, s9, sA, sB, sC, sD, sE, sF);
- src0 = vec_pack(s8, s0);
- src1 = vec_pack(s9, s1);
- src2 = vec_pack(sA, s2);
- src3 = vec_pack(sB, s3);
- src4 = vec_pack(sC, s4);
- src5 = vec_pack(sD, s5);
- src6 = vec_pack(sE, s6);
- src7 = vec_pack(sF, s7);
+
+ src0 = vec_pack(s0, s0);
+ src1 = vec_pack(s1, s1);
+ src2 = vec_pack(s2, s2);
+ src3 = vec_pack(s3, s3);
+ src4 = vec_pack(s4, s4);
+ src5 = vec_pack(s5, s5);
+ src6 = vec_pack(s6, s6);
+ src7 = vec_pack(s7, s7);
+
TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
s0 = vec_unpackh(src0);
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]