The branch, release/5.0 has been updated
via fef37af2e2d226e9cab99084c7ddeb89e6e77e35 (commit)
via 8c8cb15db4eaf8533ccb689cf64a7a8cd8ee48ae (commit)
via 7ec86238a3c9dcba60beb2a36b1119bcc7129c95 (commit)
from 3b04d086b417029697fc3a3a3b3f40096b99eb61 (commit)
- Log -----------------------------------------------------------------
commit fef37af2e2d226e9cab99084c7ddeb89e6e77e35
Author: Bin Peng <[email protected]>
AuthorDate: Fri Oct 24 15:58:08 2025 +0800
Commit: Martin Storsjö <[email protected]>
CommitDate: Tue Nov 4 14:11:13 2025 +0200
lavc/aarch64: Fix addp overflow in ff_pred16x16_plane_neon_10
The mismatch between neon and C functions can be reproduced
using the following bitstream and command line on aarch64 devices.
wget
https://streams.videolan.org/ffmpeg/incoming/replay_intra_pred_16x16.h264
./ffmpeg -cpuflags 0 -threads 1 -i replay_intra_pred_16x16.h264 -f
framemd5 -y md5_ref
./ffmpeg -threads 1 -i replay_intra_pred_16x16.h264 -f
framemd5 -y md5_neon
Signed-off-by: Bin Peng <[email protected]>
(cherry picked from commit 3115c0c0e6c27c689a02a7267dcf8e61fa2ac425)
diff --git a/libavcodec/aarch64/h264pred_neon.S
b/libavcodec/aarch64/h264pred_neon.S
index d0999938ef..795d2ce540 100644
--- a/libavcodec/aarch64/h264pred_neon.S
+++ b/libavcodec/aarch64/h264pred_neon.S
@@ -489,10 +489,10 @@ function ff_pred16x16_plane_neon_10, export=1
mul v2.8h, v2.8h, v0.8h
mul v3.8h, v3.8h, v0.8h
addp v2.8h, v2.8h, v3.8h
- addp v2.8h, v2.8h, v2.8h
- addp v2.4h, v2.4h, v2.4h
- sshll v3.4s, v2.4h, #2
- saddw v2.4s, v3.4s, v2.4h
+ saddlp v2.4s, v2.8h
+ addp v2.4s, v2.4s, v2.4s
+ shl v3.4s, v2.4s, #2
+ add v2.4s, v3.4s, v2.4s
rshrn v4.4h, v2.4s, #6
trn2 v5.4h, v4.4h, v4.4h
add v2.4h, v4.4h, v5.4h
@@ -506,14 +506,13 @@ function ff_pred16x16_plane_neon_10, export=1
sxtl v6.4s, v5.4h // c
mov v0.h[0], wzr
- mul v0.8h, v0.8h, v4.h[0]
dup v16.4s, v2.s[0]
dup v17.4s, v2.s[0]
dup v2.8h, v4.h[0] // b
dup v3.4s, v6.s[0] // c
sshll v2.4s, v2.4h, #3 // b * 8
- saddw v16.4s, v16.4s, v0.4h
- saddw2 v17.4s, v17.4s, v0.8h
+ smlal v16.4s, v0.4h, v4.h[0]
+ smlal2 v17.4s, v0.8h, v4.h[0]
sub v3.4s, v3.4s, v2.4s
mov w3, #16
commit 8c8cb15db4eaf8533ccb689cf64a7a8cd8ee48ae
Author: Bin Peng <[email protected]>
AuthorDate: Fri Dec 13 22:19:47 2024 +0800
Commit: Martin Storsjö <[email protected]>
CommitDate: Tue Nov 4 14:11:09 2025 +0200
lavc/aarch64: Fix ff_pred16x16_plane_neon_10
Fix test failure on aarch64:
./tests/checkasm/checkasm --test=h264pred 367840
Signed-off-by: Peng Bin <[email protected]>
Signed-off-by: Martin Storsjö <[email protected]>
(cherry picked from commit 72a3656e8468a394373b6397aacc906d7f7794c2)
diff --git a/libavcodec/aarch64/h264pred_neon.S
b/libavcodec/aarch64/h264pred_neon.S
index 168f8191ad..d0999938ef 100644
--- a/libavcodec/aarch64/h264pred_neon.S
+++ b/libavcodec/aarch64/h264pred_neon.S
@@ -502,28 +502,27 @@ function ff_pred16x16_plane_neon_10, export=1
add v7.4h, v7.4h, v0.4h
shl v2.4h, v7.4h, #4
ssubl v2.4s, v2.4h, v3.4h
- shl v3.4h, v4.4h, #4
ext v0.16b, v0.16b, v0.16b, #14
- ssubl v6.4s, v5.4h, v3.4h
+ sxtl v6.4s, v5.4h // c
mov v0.h[0], wzr
mul v0.8h, v0.8h, v4.h[0]
dup v16.4s, v2.s[0]
dup v17.4s, v2.s[0]
- dup v2.8h, v4.h[0]
- dup v3.4s, v6.s[0]
- shl v2.8h, v2.8h, #3
+ dup v2.8h, v4.h[0] // b
+ dup v3.4s, v6.s[0] // c
+ sshll v2.4s, v2.4h, #3 // b * 8
saddw v16.4s, v16.4s, v0.4h
saddw2 v17.4s, v17.4s, v0.8h
- saddw v3.4s, v3.4s, v2.4h
+ sub v3.4s, v3.4s, v2.4s
mov w3, #16
mvni v4.8h, #0xFC, lsl #8 // 1023 for clipping
1:
sqshrun v0.4h, v16.4s, #5
sqshrun2 v0.8h, v17.4s, #5
- saddw v16.4s, v16.4s, v2.4h
- saddw v17.4s, v17.4s, v2.4h
+ add v16.4s, v16.4s, v2.4s
+ add v17.4s, v17.4s, v2.4s
sqshrun v1.4h, v16.4s, #5
sqshrun2 v1.8h, v17.4s, #5
add v16.4s, v16.4s, v3.4s
commit 7ec86238a3c9dcba60beb2a36b1119bcc7129c95
Author: Bin Peng <[email protected]>
AuthorDate: Mon Dec 16 10:31:23 2024 +0800
Commit: Martin Storsjö <[email protected]>
CommitDate: Tue Nov 4 14:11:04 2025 +0200
lavc/aarch64: Fix ff_pred8x8_plane_neon_10
Fix test failure on aarch64:
./tests/checkasm/checkasm --test=h264pred 479612
The mismatch between neon and C functions can also be reproduced using the
following bitstream and command line.
wget https://streams.videolan.org/ffmpeg/incoming/intra8x8pred_10bit.264
./ffmpeg -cpuflags 0 -threads 1 -i intra8x8pred_10bit.264 -f framemd5 -y
md5_ref
./ffmpeg -threads 1 -i intra8x8pred_10bit.264 -f framemd5 -y
md5_neon
Signed-off-by: Bin Peng <[email protected]>
Signed-off-by: Martin Storsjö <[email protected]>
(cherry picked from commit decc9e643cc3ac5537f42b465e2637fbefbf41cc)
diff --git a/libavcodec/aarch64/h264pred_neon.S
b/libavcodec/aarch64/h264pred_neon.S
index ea37689f34..168f8191ad 100644
--- a/libavcodec/aarch64/h264pred_neon.S
+++ b/libavcodec/aarch64/h264pred_neon.S
@@ -595,12 +595,11 @@ function ff_pred8x8_plane_neon_10, export=1
ssubl v2.4s, v2.4h, v3.4h
ext v0.16b, v0.16b, v0.16b, #14
mov v0.h[0], wzr
- mul v0.8h, v0.8h, v5.h[0]
dup v1.4s, v2.s[0]
dup v2.4s, v2.s[0]
dup v3.8h, v5.h[1]
- saddw v1.4s, v1.4s, v0.4h
- saddw2 v2.4s, v2.4s, v0.8h
+ smlal v1.4s, v0.4h, v5.h[0]
+ smlal2 v2.4s, v0.8h, v5.h[0]
mov w3, #8
mvni v4.8h, #0xFC, lsl #8 // 1023 for clipping
1:
-----------------------------------------------------------------------
Summary of changes:
libavcodec/aarch64/h264pred_neon.S | 33 +++++++++++++++------------------
1 file changed, 15 insertions(+), 18 deletions(-)
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]