PR #21169 opened by Rémi Denis-Courmont (Courmisch)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21169
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21169.patch


From 9b112740bbf7559757cf1760aca9da597c97153d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]>
Date: Thu, 11 Dec 2025 18:19:42 +0200
Subject: [PATCH 1/2] lavc/llvidencdsp: fix R-V V sub_left_predict

The code assumed that the destination buffer was zeroed, a misbehaviour
with which checkasm is bug-compatible as it zeroes the destination
buffer. The fixed code is even faster:

SpacemiT X60:
sub_left_predict_c:                                  51792.5 ( 1.00x)
sub_left_predict_rvv_i32:                             3504.4 (14.78x)
---
 libavcodec/riscv/llvidencdsp_rvv.S | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavcodec/riscv/llvidencdsp_rvv.S 
b/libavcodec/riscv/llvidencdsp_rvv.S
index a862b776e0..22eef3d8f5 100644
--- a/libavcodec/riscv/llvidencdsp_rvv.S
+++ b/libavcodec/riscv/llvidencdsp_rvv.S
@@ -48,12 +48,10 @@ func ff_llvidenc_sub_left_predict_rvv, zve32x
         vsetvli t0, t3, e8, m8, ta, ma
         vle8.v  v16, (a1)
         sub     t3, t3, t0
-        vle8.v  v8, (a0)
-        add     a1, a1, t0
         vslide1up.vx    v24, v16, a5
-        vadd.vv v8, v8, v16
+        add     a1, a1, t0
+        vsub.vv v8, v16, v24
         lb      a5, -1(a1)
-        vsub.vv v8, v8, v24
         vse8.v  v8, (a0)
         add     a0, a0, t0
         bnez    t3, 2b
-- 
2.49.1


From 4bbd5cb673c015a66e9ba1abd3aaf92124d1cbd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]>
Date: Thu, 11 Dec 2025 18:24:48 +0200
Subject: [PATCH 2/2] checkasm: test all plane configurations with
 sub_left_predict

The original code didn't really make sense, never iterating the loop
and never testing non-first plane configurations.
---
 tests/checkasm/llviddspenc.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/tests/checkasm/llviddspenc.c b/tests/checkasm/llviddspenc.c
index c2eb63519f..3185520e15 100644
--- a/tests/checkasm/llviddspenc.c
+++ b/tests/checkasm/llviddspenc.c
@@ -75,29 +75,25 @@ static void check_diff_bytes(LLVidEncDSPContext *c)
 
 static void check_sub_left_pred(LLVidEncDSPContext *c)
 {
-    int i;
     LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE * MAX_HEIGHT]);
     LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE * MAX_HEIGHT]);
-    LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT]);
-    LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT]);
+    LOCAL_ALIGNED_32(uint8_t, src, [MAX_STRIDE * MAX_HEIGHT]);
 
     declare_func(void, uint8_t *dst, const uint8_t *src,
                  ptrdiff_t stride, ptrdiff_t width, int height);
 
-    memset(dst0, 0, MAX_STRIDE * MAX_HEIGHT);
-    memset(dst1, 0, MAX_STRIDE * MAX_HEIGHT);
-    randomize_buffers(src0, MAX_STRIDE * MAX_HEIGHT);
-    memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT);
+    randomize_buffers(src, MAX_STRIDE * MAX_HEIGHT);
 
     if (check_func(c->sub_left_predict, "sub_left_predict")) {
-        for (i = 0; i < 5; i ++) {
-            call_ref(dst0, src0, planes[i].s, planes[i].w, planes[i].h);
-            call_new(dst1, src1, planes[i].s, planes[i].w, planes[i].h);
+        for (size_t i = 0; i < FF_ARRAY_ELEMS(planes); i ++) {
+            memset(dst0, 0, MAX_STRIDE * MAX_HEIGHT);
+            memset(dst1, 0, MAX_STRIDE * MAX_HEIGHT);
+            call_ref(dst0, src, planes[i].s, planes[i].w, planes[i].h);
+            call_new(dst1, src, planes[i].s, planes[i].w, planes[i].h);
             if (memcmp(dst0, dst1, planes[i].w * planes[i].h))
                 fail();
-            break;
         }
-        bench_new(dst1, src0, planes[4].s, planes[4].w, planes[4].h);
+        bench_new(dst1, src, planes[4].s, planes[4].w, planes[4].h);
     }
 }
 
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to