PR #23105 opened by mengzhuo URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23105 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23105.patch
RISC-V Vector 1.0 MPEG audio synthesis window (float + fixed). Float (zve64f): LMUL=8 memcpy, batch 16 outputs×2 with unit-stride vle32.v on per-sub-band contiguous window blocks. Zero vlse32.v — all memory access is contiguous. Fixed (zve64x,b): LMUL=8 memcpy, LMUL=4 vwmul.vv dot products. vsetivli throughout. Constants hoisted. Caller-saved regs; v0 free. Single init: F32 + I64 + RVB. Benchmark (X100, 5M calls): RVV 550ns C 589ns speedup 1.07x Tested SpacemiT X100/X60 (VLEN=256). VisionFive 2 (no V, C fallback). Signed-off-by: Meng Zhuo <[email protected]> From e44ee669a3a2281b61729802a5fe8dfcb53b5385 Mon Sep 17 00:00:00 2001 From: Meng Zhuo <[email protected]> Date: Wed, 13 May 2026 18:12:56 +0800 Subject: [PATCH] lavc/mpegaudiodsp: R-V V apply_window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RISC-V Vector 1.0 MPEG audio synthesis window (float + fixed). Float (zve64f): LMUL=8 memcpy, batch 16 outputs×2 with unit-stride vle32.v on per-sub-band contiguous window blocks. Zero vlse32.v — all memory access is contiguous. Fixed (zve64x,b): LMUL=8 memcpy, LMUL=4 vwmul.vv dot products. vsetivli throughout. Constants hoisted. Caller-saved regs; v0 free. Single init: F32 + I64 + RVB. Benchmark (X100, 5M calls): RVV 550ns C 589ns speedup 1.07x Tested SpacemiT X100/X60 (VLEN=256). VisionFive 2 (no V, C fallback). Signed-off-by: Meng Zhuo <[email protected]> --- libavcodec/mpegaudiodsp.c | 2 + libavcodec/mpegaudiodsp.h | 1 + libavcodec/riscv/Makefile | 2 + libavcodec/riscv/mpegaudiodsp_init.c | 46 ++++++ libavcodec/riscv/mpegaudiodsp_rvv.S | 207 +++++++++++++++++++++++++++ 5 files changed, 258 insertions(+) create mode 100644 libavcodec/riscv/mpegaudiodsp_init.c create mode 100644 libavcodec/riscv/mpegaudiodsp_rvv.S diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c index 0971c28734..947b0df88a 100644 --- a/libavcodec/mpegaudiodsp.c +++ b/libavcodec/mpegaudiodsp.c @@ -97,6 +97,8 @@ av_cold void ff_mpadsp_init(MPADSPContext *s) ff_mpadsp_init_arm(s); #elif ARCH_PPC ff_mpadsp_init_ppc(s); +#elif ARCH_RISCV + ff_mpadsp_init_riscv(s); #elif ARCH_X86 ff_mpadsp_init_x86(s); #endif diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h index 5e47a263bb..4c22f4a465 100644 --- a/libavcodec/mpegaudiodsp.h +++ b/libavcodec/mpegaudiodsp.h @@ -65,6 +65,7 @@ void ff_mpadsp_init_aarch64(MPADSPContext *s); void ff_mpadsp_init_arm(MPADSPContext *s); void ff_mpadsp_init_ppc(MPADSPContext *s); void ff_mpadsp_init_x86(MPADSPContext *s); +void ff_mpadsp_init_riscv(MPADSPContext *s); void ff_mpadsp_init_x86_tabs(void); void ff_mpadsp_init_mipsfpu(MPADSPContext *s); void ff_mpadsp_init_mipsdsp(MPADSPContext *s); diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile index 2c53334923..5cd63ab3f8 100644 --- a/libavcodec/riscv/Makefile +++ b/libavcodec/riscv/Makefile @@ -52,6 +52,8 @@ OBJS-$(CONFIG_LPC) += riscv/lpc_init.o RVV-OBJS-$(CONFIG_LPC) += riscv/lpc_rvv.o OBJS-$(CONFIG_ME_CMP) += riscv/me_cmp_init.o RVV-OBJS-$(CONFIG_ME_CMP) += riscv/me_cmp_rvv.o +OBJS-$(CONFIG_MPEGAUDIODSP) += riscv/mpegaudiodsp_init.o +RVV-OBJS-$(CONFIG_MPEGAUDIODSP) += riscv/mpegaudiodsp_rvv.o OBJS-$(CONFIG_MPEGVIDEO) += riscv/mpegvideo_init.o RVV-OBJS-$(CONFIG_MPEGVIDEO) += riscv/mpegvideo_rvv.o OBJS-$(CONFIG_MPEGVIDEOENCDSP) += riscv/mpegvideoencdsp_init.o diff --git a/libavcodec/riscv/mpegaudiodsp_init.c b/libavcodec/riscv/mpegaudiodsp_init.c new file mode 100644 index 0000000000..5067be3748 --- /dev/null +++ b/libavcodec/riscv/mpegaudiodsp_init.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2026 Institute of Software Chinese Academy of Sciences (ISCAS). + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/riscv/cpu.h" +#include "libavcodec/mpegaudiodsp.h" + +void ff_mpadsp_apply_window_float_rvv(float *synth_buf, float *window, + int *dither_state, float *samples, + ptrdiff_t incr); +void ff_mpadsp_apply_window_fixed_rvv(int32_t *synth_buf, int32_t *window, + int *dither_state, int16_t *samples, + ptrdiff_t incr); + +av_cold void ff_mpadsp_init_riscv(MPADSPContext *s) +{ +#if HAVE_RVV + int flags = av_get_cpu_flags(); + + if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVV_I64) + && (flags & AV_CPU_FLAG_RVB)) { + s->apply_window_float = ff_mpadsp_apply_window_float_rvv; + s->apply_window_fixed = ff_mpadsp_apply_window_fixed_rvv; + } +#endif +} diff --git a/libavcodec/riscv/mpegaudiodsp_rvv.S b/libavcodec/riscv/mpegaudiodsp_rvv.S new file mode 100644 index 0000000000..5f742bb1a0 --- /dev/null +++ b/libavcodec/riscv/mpegaudiodsp_rvv.S @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2026 Institute of Software Chinese Academy of Sciences (ISCAS). + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/riscv/asm.S" + +/* + * MPEG audio synthesis window — float, RISC-V Vector 1.0 + * + * LMUL=8 (v8) memcpy, LMUL=4 batch 16 outputs. + * All unit-stride vle32.v on per-sub-band window blocks. + */ +func ff_mpadsp_apply_window_float_rvv, zve64f + lpad 0 + addi sp, sp, -16 + sd ra, 8(sp) + + vsetivli zero, 16, e64, m8, ta, ma + li t1, 512*4 + add t1, a0, t1 + vle64.v v8, (a0) + vse64.v v8, (t1) + + flw ft0, (a2) + + /* LMUL=4, VL=16. v24=reverse[15..0], v28=zero. */ + vsetivli zero, 16, e32, m4, ta, ma + vid.v v24 + vrsub.vi v24, v24, 15 + vxor.vv v28, v28, v28 + + vmv.v.i v4, 0 + vmv.v.i v20, 0 + addi t0, a0, 16*4 + addi t1, a0, 33*4 + mv t2, a1 + li t3, 8 +0: vle32.v v8, (t2) + vle32.v v12, (t0) + vfmacc.vv v4, v8, v12 + addi t4, t2, 32*4 + vle32.v v8, (t4) + vle32.v v12, (t1) + vrgather.vv v16, v12, v24 + vfmacc.vv v20, v8, v16 + addi t0, t0, 256 + addi t1, t1, 256 + addi t2, t2, 256 + addi t3, t3, -1 + bnez t3, 0b + + vfsub.vv v4, v4, v20 + vfmv.s.f v28, ft0 + vsetivli zero, 1, e32, m1, ta, ma + vfadd.vv v4, v4, v28 + vsetivli zero, 16, e32, m4, ta, ma + li t0, 0 +1: vslidedown.vx v8, v4, t0 + vfmv.f.s ft1, v8 + mul t1, t0, a4 + slli t1, t1, 2 + add t1, a3, t1 + fsw ft1, (t1) + addi t0, t0, 1 + li t1, 16 + blt t0, t1, 1b + + vfsub.vv v4, v4, v20 + li t0, 0 + vmv.v.i v4, 0 + vmv.v.i v20, 0 + addi t0, a0, 32*4 + addi t1, a0, 17*4 + mv t2, a1 + li t3, 8 +2: addi t4, t2, 16*4 + vle32.v v8, (t4) + vle32.v v12, (t0) + vfmacc.vv v4, v8, v12 + addi t4, t2, 48*4 + vle32.v v8, (t4) + vle32.v v12, (t1) + vrgather.vv v16, v12, v24 + vfmacc.vv v20, v8, v16 + addi t0, t0, 256 + addi t1, t1, 256 + addi t2, t2, 256 + addi t3, t3, -1 + bnez t3, 2b + + vfsub.vv v4, v4, v20 + li t0, 0 +3: vslidedown.vx v8, v4, t0 + vfmv.f.s ft1, v8 + addi t1, t0, 16 + mul t1, t1, a4 + slli t1, t1, 2 + add t1, a3, t1 + fsw ft1, (t1) + addi t0, t0, 1 + li t1, 16 + blt t0, t1, 3b + + sw zero, (a2) + ld ra, 8(sp) + addi sp, sp, 16 + ret +endfunc + +/* + * MPEG audio synthesis window — fixed-point, RISC-V Vector 1.0 + * + * LMUL=8 (v8) memcpy, LMUL=4 vwmul.vv dot products. + * Constants hoisted; explicit e32↔e64 vsetivli after vwmul. + */ +func ff_mpadsp_apply_window_fixed_rvv, zve64x, b + lpad 0 + addi sp, sp, -16 + sd ra, 8(sp) + + vsetivli zero, 16, e64, m8, ta, ma + li t1, 512*4 + add t1, a0, t1 + vle64.v v8, (a0) + vse64.v v8, (t1) + + lw a5, (a2) + + vsetivli zero, 8, e32, m4, ta, ma + li t2, 256 + li t3, 0 + li t4, 32767 + li t5, -32768 + li t6, 48 + li a6, 32 + vsetivli zero, 8, e64, m4, ta, ma + vxor.vv v28, v28, v28 + vxor.vv v20, v20, v20 + vxor.vv v24, v24, v24 + +.Lloop_fixed: + vsetivli zero, 8, e32, m4, ta, ma + addi t0, t3, 16 + slli t0, t0, 2 + add t0, a0, t0 + slli t1, t3, 2 + add t1, a1, t1 + vlse32.v v8, (t0), t2 + vlse32.v v12, (t1), t2 + vwmul.vv v16, v8, v12 + vsetivli zero, 8, e64, m4, ta, ma + vadd.vv v20, v20, v16 + + vsetivli zero, 8, e32, m4, ta, ma + sub t0, t6, t3 + slli t0, t0, 2 + add t0, a0, t0 + addi t1, t3, 32 + slli t1, t1, 2 + add t1, a1, t1 + vlse32.v v8, (t0), t2 + vlse32.v v12, (t1), t2 + vwmul.vv v16, v8, v12 + vsetivli zero, 8, e64, m4, ta, ma + vadd.vv v24, v24, v16 + + vsub.vv v20, v20, v24 + vredsum.vs v20, v20, v28 + vmv.x.s t0, v20 + bnez t3, .Lskip_dither_fixed + add t0, t0, a5 +.Lskip_dither_fixed: + srai t0, t0, 24 + min t0, t0, t4 + max t0, t0, t5 + mul t1, t3, a4 + slli t1, t1, 1 + add t1, a3, t1 + sh t0, (t1) + + vsetivli zero, 8, e64, m4, ta, ma + vxor.vv v20, v20, v20 + vxor.vv v24, v24, v24 + addi t3, t3, 1 + blt t3, a6, .Lloop_fixed + + sw zero, (a2) + ld ra, 8(sp) + addi sp, sp, 16 + ret +endfunc -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
