PR #22466 opened by R-Camacho URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22466 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22466.patch
SpacemiT K1 scene_sad8_c: 330212.5 ( 1.00x) scene_sad8_rvv_i32: 24649.3 (13.40x) scene_sad10_c: 318776.9 ( 1.00x) scene_sad10_rvv_i32: 30582.9 (10.42x) scene_sad12_c: 317931.7 ( 1.00x) scene_sad12_rvv_i32: 30567.6 (10.40x) scene_sad14_c: 317800.6 ( 1.00x) scene_sad14_rvv_i32: 30573.9 (10.39x) scene_sad15_c: 317870.8 ( 1.00x) scene_sad15_rvv_i32: 30570.3 (10.40x) scene_sad16_c: 317885.1 ( 1.00x) scene_sad16_rvv_i32: 30571.2 (10.40x) Signed-off-by: Rodrigo Camacho <[email protected]> From 9222d824093baa5086683616cb9ddb296b171162 Mon Sep 17 00:00:00 2001 From: Rodrigo Camacho <[email protected]> Date: Thu, 12 Feb 2026 17:45:04 +0000 Subject: [PATCH] avfilter/scene_sad: R-V V scene_sad SpacemiT K1 scene_sad8_c: 330212.5 ( 1.00x) scene_sad8_rvv_i32: 24649.3 (13.40x) scene_sad10_c: 318776.9 ( 1.00x) scene_sad10_rvv_i32: 30582.9 (10.42x) scene_sad12_c: 317931.7 ( 1.00x) scene_sad12_rvv_i32: 30567.6 (10.40x) scene_sad14_c: 317800.6 ( 1.00x) scene_sad14_rvv_i32: 30573.9 (10.39x) scene_sad15_c: 317870.8 ( 1.00x) scene_sad15_rvv_i32: 30570.3 (10.40x) scene_sad16_c: 317885.1 ( 1.00x) scene_sad16_rvv_i32: 30571.2 (10.40x) Signed-off-by: Rodrigo Camacho <[email protected]> --- libavfilter/riscv/Makefile | 8 +- libavfilter/riscv/scene_sad_init.c | 40 ++++++++++ libavfilter/riscv/scene_sad_rvv.S | 121 +++++++++++++++++++++++++++++ libavfilter/scene_sad.c | 3 + libavfilter/scene_sad.h | 2 + 5 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 libavfilter/riscv/scene_sad_init.c create mode 100644 libavfilter/riscv/scene_sad_rvv.S diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile index 32b07eec1a..369e297d84 100644 --- a/libavfilter/riscv/Makefile +++ b/libavfilter/riscv/Makefile @@ -1,6 +1,10 @@ OBJS-$(CONFIG_AFIR_FILTER) += riscv/af_afir_init.o RVV-OBJS-$(CONFIG_AFIR_FILTER) += riscv/af_afir_rvv.o -OBJS-$(CONFIG_BLACKDETECT_FILTER) += riscv/vf_blackdetect_init.o -RVV-OBJS-$(CONFIG_BLACKDETECT_FILTER) += riscv/vf_blackdetect_rvv.o + +OBJS-$(CONFIG_BLACKDETECT_FILTER) += riscv/vf_blackdetect_init.o +RVV-OBJS-$(CONFIG_BLACKDETECT_FILTER) += riscv/vf_blackdetect_rvv.o + +OBJS-$(CONFIG_SCENE_SAD) += riscv/scene_sad_init.o +RVV-OBJS-$(CONFIG_SCENE_SAD) += riscv/scene_sad_rvv.o SHLIBOBJS += riscv/cpu_common.o diff --git a/libavfilter/riscv/scene_sad_init.c b/libavfilter/riscv/scene_sad_init.c new file mode 100644 index 0000000000..7392195a8a --- /dev/null +++ b/libavfilter/riscv/scene_sad_init.c @@ -0,0 +1,40 @@ +/* + * Copyright © 2026 Rodrigo Camacho. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/cpu.h" +#include "libavfilter/scene_sad.h" + +void ff_scene_sad8_rvv(SCENE_SAD_PARAMS); + +void ff_scene_sad16_rvv(SCENE_SAD_PARAMS); + +ff_scene_sad_fn ff_scene_sad_get_fn_riscv(int depth) { + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_RVV_I32) { + if (depth <= 8) { + return ff_scene_sad8_rvv; + } else if (depth <= 16) { + return ff_scene_sad16_rvv; + } + } + return NULL; +} + diff --git a/libavfilter/riscv/scene_sad_rvv.S b/libavfilter/riscv/scene_sad_rvv.S new file mode 100644 index 0000000000..aa2a08fb46 --- /dev/null +++ b/libavfilter/riscv/scene_sad_rvv.S @@ -0,0 +1,121 @@ +/* + * Copyright © 2026 Rodrigo Camacho. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +/* + * void ff_scene_sad8_rvv(const uint8_t *src1, ptrdiff_t stride1, + * const uint8_t *src2, ptrdiff_t stride2, + * ptrdiff_t width, ptrdiff_t height, + * uint64_t *sum) +*/ +func ff_scene_sad8_rvv, zve32x + lpad 0 + mv a7, zero + + vsetivli zero, 1, e32, m1, ta, ma + vmv.s.x v24, zero + + sub a1, a1, a4 + sub a3, a3, a4 +1: + mv t0, a4 + addi a5, a5, -1 +2: + vsetvli t1, t0, e8, m4, ta, ma + vle8.v v0, (a0) + vle8.v v4, (a2) + + add a0, a0, t1 + vmaxu.vv v8, v0, v4 + vminu.vv v12, v0, v4 + sub t0, t0, t1 + vsub.vv v8, v8, v12 + + vsetvli zero, zero, e16, m8, ta, ma + vzext.vf2 v16, v8 + add a2, a2, t1 + vwredsumu.vs v24, v16, v24 + + bnez t0, 2b + + vsetivli zero, 1, e32, m1, ta, ma + vmv.x.s t2, v24 + vmv.s.x v24, zero + + add a0, a0, a1 + add a7, a7, t2 + add a2, a2, a3 + + bnez a5, 1b + + sd a7, 0(a6) + ret +endfunc + +/* + * void ff_scene_sad16_rvv(const uint8_t *src1, ptrdiff_t stride1, + * const uint8_t *src2, ptrdiff_t stride2, + * ptrdiff_t width, ptrdiff_t height, + * uint64_t *sum) +*/ +func ff_scene_sad16_rvv, zve32x + lpad 0 + mv a7, zero + slli t3, a4, 1 + + vsetivli zero, 1, e32, m1, ta, ma + vmv.s.x v24, zero + + sub a1, a1, t3 + sub a3, a3, t3 +1: + mv t0, a4 + addi a5, a5, -1 +2: + vsetvli t1, t0, e16, m4, ta, ma + vle16.v v0, (a0) + vle16.v v4, (a2) + + slli t2, t1, 1 + sub t0, t0, t1 + + vmaxu.vv v8, v0, v4 + vminu.vv v12, v0, v4 + add a2, a2, t2 + vsub.vv v8, v8, v12 + add a0, a0, t2 + vwredsumu.vs v24, v8, v24 + + bnez t0, 2b + + vsetivli zero, 1, e32, m1, ta, ma + vmv.x.s t2, v24 + vmv.s.x v24, zero + + add a0, a0, a1 + add a7, a7, t2 + add a2, a2, a3 + + bnez a5, 1b + + sd a7, 0(a6) + ret +endfunc diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c index 56177ced76..083246ad92 100644 --- a/libavfilter/scene_sad.c +++ b/libavfilter/scene_sad.c @@ -61,6 +61,9 @@ ff_scene_sad_fn ff_scene_sad_get_fn(int depth) ff_scene_sad_fn sad = NULL; #if ARCH_X86 && HAVE_X86ASM sad = ff_scene_sad_get_fn_x86(depth); +#endif +#if ARCH_RISCV && HAVE_RVV + sad = ff_scene_sad_get_fn_riscv(depth); #endif if (!sad) { if (depth <= 8) diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h index 173a051f2b..cfa3509afc 100644 --- a/libavfilter/scene_sad.h +++ b/libavfilter/scene_sad.h @@ -39,6 +39,8 @@ void ff_scene_sad16_c(SCENE_SAD_PARAMS); ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth); +ff_scene_sad_fn ff_scene_sad_get_fn_riscv(int depth); + ff_scene_sad_fn ff_scene_sad_get_fn(int depth); #endif /* AVFILTER_SCENE_SAD_H */ -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
