PR #22466 opened by R-Camacho
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22466
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22466.patch

SpacemiT K1
scene_sad8_c:                                       330212.5 ( 1.00x)
scene_sad8_rvv_i32:                                  24649.3 (13.40x)
scene_sad10_c:                                      318776.9 ( 1.00x)
scene_sad10_rvv_i32:                                 30582.9 (10.42x)
scene_sad12_c:                                      317931.7 ( 1.00x)
scene_sad12_rvv_i32:                                 30567.6 (10.40x)
scene_sad14_c:                                      317800.6 ( 1.00x)
scene_sad14_rvv_i32:                                 30573.9 (10.39x)
scene_sad15_c:                                      317870.8 ( 1.00x)
scene_sad15_rvv_i32:                                 30570.3 (10.40x)
scene_sad16_c:                                      317885.1 ( 1.00x)
scene_sad16_rvv_i32:                                 30571.2 (10.40x)

Signed-off-by: Rodrigo Camacho <[email protected]>


From 9222d824093baa5086683616cb9ddb296b171162 Mon Sep 17 00:00:00 2001
From: Rodrigo Camacho <[email protected]>
Date: Thu, 12 Feb 2026 17:45:04 +0000
Subject: [PATCH] avfilter/scene_sad: R-V V scene_sad

SpacemiT K1
scene_sad8_c:                                       330212.5 ( 1.00x)
scene_sad8_rvv_i32:                                  24649.3 (13.40x)
scene_sad10_c:                                      318776.9 ( 1.00x)
scene_sad10_rvv_i32:                                 30582.9 (10.42x)
scene_sad12_c:                                      317931.7 ( 1.00x)
scene_sad12_rvv_i32:                                 30567.6 (10.40x)
scene_sad14_c:                                      317800.6 ( 1.00x)
scene_sad14_rvv_i32:                                 30573.9 (10.39x)
scene_sad15_c:                                      317870.8 ( 1.00x)
scene_sad15_rvv_i32:                                 30570.3 (10.40x)
scene_sad16_c:                                      317885.1 ( 1.00x)
scene_sad16_rvv_i32:                                 30571.2 (10.40x)

Signed-off-by: Rodrigo Camacho <[email protected]>
---
 libavfilter/riscv/Makefile         |   8 +-
 libavfilter/riscv/scene_sad_init.c |  40 ++++++++++
 libavfilter/riscv/scene_sad_rvv.S  | 121 +++++++++++++++++++++++++++++
 libavfilter/scene_sad.c            |   3 +
 libavfilter/scene_sad.h            |   2 +
 5 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 libavfilter/riscv/scene_sad_init.c
 create mode 100644 libavfilter/riscv/scene_sad_rvv.S

diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
index 32b07eec1a..369e297d84 100644
--- a/libavfilter/riscv/Makefile
+++ b/libavfilter/riscv/Makefile
@@ -1,6 +1,10 @@
 OBJS-$(CONFIG_AFIR_FILTER)                   += riscv/af_afir_init.o
 RVV-OBJS-$(CONFIG_AFIR_FILTER)               += riscv/af_afir_rvv.o
-OBJS-$(CONFIG_BLACKDETECT_FILTER) += riscv/vf_blackdetect_init.o
-RVV-OBJS-$(CONFIG_BLACKDETECT_FILTER) += riscv/vf_blackdetect_rvv.o
+
+OBJS-$(CONFIG_BLACKDETECT_FILTER)                       += 
riscv/vf_blackdetect_init.o
+RVV-OBJS-$(CONFIG_BLACKDETECT_FILTER)           += riscv/vf_blackdetect_rvv.o
+
+OBJS-$(CONFIG_SCENE_SAD)                     += riscv/scene_sad_init.o
+RVV-OBJS-$(CONFIG_SCENE_SAD)                 += riscv/scene_sad_rvv.o
 
 SHLIBOBJS += riscv/cpu_common.o
diff --git a/libavfilter/riscv/scene_sad_init.c 
b/libavfilter/riscv/scene_sad_init.c
new file mode 100644
index 0000000000..7392195a8a
--- /dev/null
+++ b/libavfilter/riscv/scene_sad_init.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2026 Rodrigo Camacho.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavfilter/scene_sad.h"
+
+void ff_scene_sad8_rvv(SCENE_SAD_PARAMS);
+
+void ff_scene_sad16_rvv(SCENE_SAD_PARAMS);
+
+ff_scene_sad_fn ff_scene_sad_get_fn_riscv(int depth) {
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        if (depth <= 8) {
+            return ff_scene_sad8_rvv;
+        } else if (depth <= 16) {
+            return ff_scene_sad16_rvv;
+        }
+    }
+    return NULL;
+}
+
diff --git a/libavfilter/riscv/scene_sad_rvv.S 
b/libavfilter/riscv/scene_sad_rvv.S
new file mode 100644
index 0000000000..aa2a08fb46
--- /dev/null
+++ b/libavfilter/riscv/scene_sad_rvv.S
@@ -0,0 +1,121 @@
+/*
+ * Copyright © 2026 Rodrigo Camacho.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+/*
+ * void ff_scene_sad8_rvv(const uint8_t *src1, ptrdiff_t stride1,
+ *                        const uint8_t *src2, ptrdiff_t stride2,
+ *                        ptrdiff_t width, ptrdiff_t height,
+ *                        uint64_t *sum)
+*/
+func ff_scene_sad8_rvv, zve32x
+        lpad      0
+        mv        a7, zero
+
+        vsetivli  zero, 1, e32, m1, ta, ma
+        vmv.s.x   v24, zero
+
+        sub       a1, a1, a4
+        sub       a3, a3, a4
+1:
+        mv        t0, a4
+        addi      a5, a5, -1
+2:
+        vsetvli   t1, t0, e8, m4, ta, ma
+        vle8.v    v0, (a0)
+        vle8.v    v4, (a2)
+
+        add       a0, a0, t1
+        vmaxu.vv  v8, v0, v4
+        vminu.vv  v12, v0, v4
+        sub       t0, t0, t1
+        vsub.vv   v8, v8, v12
+
+        vsetvli   zero, zero, e16, m8, ta, ma
+        vzext.vf2 v16, v8
+        add       a2, a2, t1
+        vwredsumu.vs v24, v16, v24
+
+        bnez      t0, 2b
+
+        vsetivli  zero, 1, e32, m1, ta, ma
+        vmv.x.s   t2, v24
+        vmv.s.x   v24, zero
+
+        add       a0, a0, a1
+        add       a7, a7, t2
+        add       a2, a2, a3
+
+        bnez      a5, 1b
+
+        sd        a7, 0(a6)
+        ret
+endfunc
+
+/*
+ * void ff_scene_sad16_rvv(const uint8_t *src1, ptrdiff_t stride1,
+ *                        const uint8_t *src2, ptrdiff_t stride2,
+ *                        ptrdiff_t width, ptrdiff_t height,
+ *                        uint64_t *sum)
+*/
+func ff_scene_sad16_rvv, zve32x
+        lpad     0
+        mv       a7, zero
+        slli     t3, a4, 1
+
+        vsetivli zero, 1, e32, m1, ta, ma
+        vmv.s.x  v24, zero
+
+        sub      a1, a1, t3
+        sub      a3, a3, t3
+1:
+        mv       t0, a4
+        addi     a5, a5, -1
+2:
+        vsetvli  t1, t0, e16, m4, ta, ma
+        vle16.v  v0, (a0)
+        vle16.v  v4, (a2)
+
+        slli     t2, t1, 1
+        sub      t0, t0, t1
+
+        vmaxu.vv v8, v0, v4
+        vminu.vv v12, v0, v4
+        add      a2, a2, t2
+        vsub.vv  v8, v8, v12
+        add      a0, a0, t2
+        vwredsumu.vs v24, v8, v24
+
+        bnez     t0, 2b
+
+        vsetivli zero, 1, e32, m1, ta, ma
+        vmv.x.s  t2, v24
+        vmv.s.x  v24, zero
+
+        add      a0, a0, a1
+        add      a7, a7, t2
+        add      a2, a2, a3
+
+        bnez     a5, 1b
+
+        sd       a7, 0(a6)
+        ret
+endfunc
diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
index 56177ced76..083246ad92 100644
--- a/libavfilter/scene_sad.c
+++ b/libavfilter/scene_sad.c
@@ -61,6 +61,9 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
     ff_scene_sad_fn sad = NULL;
 #if ARCH_X86 && HAVE_X86ASM
     sad = ff_scene_sad_get_fn_x86(depth);
+#endif
+#if ARCH_RISCV && HAVE_RVV
+    sad = ff_scene_sad_get_fn_riscv(depth);
 #endif
     if (!sad) {
         if (depth <= 8)
diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
index 173a051f2b..cfa3509afc 100644
--- a/libavfilter/scene_sad.h
+++ b/libavfilter/scene_sad.h
@@ -39,6 +39,8 @@ void ff_scene_sad16_c(SCENE_SAD_PARAMS);
 
 ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
 
+ff_scene_sad_fn ff_scene_sad_get_fn_riscv(int depth);
+
 ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
 
 #endif /* AVFILTER_SCENE_SAD_H */
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to