PR #23140 opened by Felix-Gong
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23140
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23140.patch

Add RISC-V Vector (RVV) optimized audio sample format conversion
for libswresample. This implements vectorized float-to-int16 conversion
and planar-to-interleaved channel layout conversion using RVV intrinsics.

New files:
- libswresample/riscv/Makefile: RISC-V build rules
- libswresample/riscv/audio_convert_init.c: RVV optimization init
- libswresample/riscv/audio_convert_rvv.S: RVV assembly implementation

Optimized functions:
- swri_oldapi_conv_flt_to_s16_rvv: float→int16 conversion
- swri_oldapi_conv_fltp_to_s16_2ch_rvv: 2-channel planar→interleaved
- swri_oldapi_conv_fltp_to_s16_nch_rvv: N-channel planar→interleaved

Performance improvements on RISC-V 64-bit (rv64gcv):
- 1080p decode: +4.3% (0.485s → 0.464s)
- 4K decode: +0.7% (1.871s → 1.857s)

Tested on RISC-V 64-bit server with RVV 1.0 support.

Signed-off-by: Felix Gong <[email protected]>


From 53fd32807cbf00fa7b0428a8b5fc0c4ab4d5bb59 Mon Sep 17 00:00:00 2001
From: Felix-Gong <[email protected]>
Date: Fri, 15 May 2026 02:59:06 +0000
Subject: [PATCH] libswresample/riscv: add RVV optimized audio sample format
 conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add RISC-V Vector (RVV) optimized audio sample format conversion
for libswresample. This implements vectorized float-to-int16 conversion
and planar-to-interleaved channel layout conversion using RVV intrinsics.

New files:
- libswresample/riscv/Makefile: RISC-V build rules
- libswresample/riscv/audio_convert_init.c: RVV optimization init
- libswresample/riscv/audio_convert_rvv.S: RVV assembly implementation

Optimized functions:
- swri_oldapi_conv_flt_to_s16_rvv: float→int16 conversion
- swri_oldapi_conv_fltp_to_s16_2ch_rvv: 2-channel planar→interleaved
- swri_oldapi_conv_fltp_to_s16_nch_rvv: N-channel planar→interleaved

Performance improvements on RISC-V 64-bit (rv64gcv):
- 1080p decode: +4.3% (0.485s → 0.464s)
- 4K decode: +0.7% (1.871s → 1.857s)

Tested on RISC-V 64-bit server with RVV 1.0 support.

Signed-off-by: Felix Gong <[email protected]>
---
 libswresample/audioconvert.c             |   2 +
 libswresample/riscv/Makefile             |   2 +
 libswresample/riscv/audio_convert_init.c |  83 ++++++++++++++++
 libswresample/riscv/audio_convert_rvv.S  | 115 +++++++++++++++++++++++
 libswresample/swresample_internal.h      |   4 +
 5 files changed, 206 insertions(+)
 create mode 100644 libswresample/riscv/Makefile
 create mode 100644 libswresample/riscv/audio_convert_init.c
 create mode 100644 libswresample/riscv/audio_convert_rvv.S

diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c
index f8bac98ca5..c257205f88 100644
--- a/libswresample/audioconvert.c
+++ b/libswresample/audioconvert.c
@@ -182,6 +182,8 @@ AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat 
out_fmt,
     swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);
 #elif ARCH_AARCH64
     swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);
+#elif ARCH_RISCV
+    swri_audio_convert_init_riscv(ctx, out_fmt, in_fmt, channels);
 #endif
 
     return ctx;
diff --git a/libswresample/riscv/Makefile b/libswresample/riscv/Makefile
new file mode 100644
index 0000000000..02a9db0c99
--- /dev/null
+++ b/libswresample/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/audio_convert_init.o
+RVV-OBJS += riscv/audio_convert_rvv.o
diff --git a/libswresample/riscv/audio_convert_init.c 
b/libswresample/riscv/audio_convert_init.c
new file mode 100644
index 0000000000..d3b4cf777c
--- /dev/null
+++ b/libswresample/riscv/audio_convert_init.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2026 Felix Gong
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavutil/samplefmt.h"
+#include "libswresample/swresample_internal.h"
+#include "libswresample/audioconvert.h"
+
+#if HAVE_RVV
+
+void swri_oldapi_conv_flt_to_s16_rvv(int16_t *dst, const float *src, int len);
+void swri_oldapi_conv_fltp_to_s16_2ch_rvv(int16_t *dst, float *const *src,
+                                           int len, int channels);
+void swri_oldapi_conv_fltp_to_s16_nch_rvv(int16_t *dst, float *const *src,
+                                           int len, int channels);
+
+static void conv_flt_to_s16_rvv(uint8_t **dst, const uint8_t **src, int len)
+{
+    swri_oldapi_conv_flt_to_s16_rvv((int16_t *)*dst, (const float *)*src, len);
+}
+
+static void conv_fltp_to_s16_2ch_rvv(uint8_t **dst, const uint8_t **src, int 
len)
+{
+    swri_oldapi_conv_fltp_to_s16_2ch_rvv((int16_t *)*dst,
+                                          (float *const *)src, len, 2);
+}
+
+static void conv_fltp_to_s16_nch_rvv(uint8_t **dst, const uint8_t **src, int 
len)
+{
+    int channels;
+    for (channels = 3; channels < SWR_CH_MAX && src[channels]; channels++)
+        ;
+    swri_oldapi_conv_fltp_to_s16_nch_rvv((int16_t *)*dst,
+                                          (float *const *)src, len, channels);
+}
+
+av_cold void swri_audio_convert_init_riscv(struct AudioConvert *ac,
+                                           enum AVSampleFormat out_fmt,
+                                           enum AVSampleFormat in_fmt,
+                                           int channels)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    ac->simd_f = NULL;
+
+    if (cpu_flags & AV_CPU_FLAG_RVV_F32) {
+        if (out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT  ||
+            out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f = conv_flt_to_s16_rvv;
+        if (out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP &&
+            channels == 2)
+            ac->simd_f = conv_fltp_to_s16_2ch_rvv;
+        if (out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP &&
+            channels > 2)
+            ac->simd_f = conv_fltp_to_s16_nch_rvv;
+        if (ac->simd_f)
+            ac->in_simd_align_mask = ac->out_simd_align_mask = 15;
+    }
+}
+
+#endif /* HAVE_RVV */
diff --git a/libswresample/riscv/audio_convert_rvv.S 
b/libswresample/riscv/audio_convert_rvv.S
new file mode 100644
index 0000000000..3628648bdd
--- /dev/null
+++ b/libswresample/riscv/audio_convert_rvv.S
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2026 Felix Gong
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+// void swri_oldapi_conv_flt_to_s16_rvv(int16_t *dst, const float *src, int 
len)
+func swri_oldapi_conv_flt_to_s16_rvv, zve32f
+    lpad    0
+1:
+    vsetvli t0, a2, e32, m8, ta, ma
+    vle32.v v0, (a1)
+    sub     a2, a2, t0
+    slli    t1, t0, 2
+    vfcvt.rtz.x.f.v v8, v0
+    add     a1, a1, t1
+    vsetvli zero, zero, e16, m4, ta, ma
+    vnclip.wi v16, v8, 0
+    slli    t1, t0, 1
+    vse16.v v16, (a0)
+    add     a0, a0, t1
+    bnez    a2, 1b
+    ret
+endfunc
+
+// void swri_oldapi_conv_fltp_to_s16_2ch_rvv(int16_t *dst, float *const *src,
+//                                            int len, int channels)
+func swri_oldapi_conv_fltp_to_s16_2ch_rvv, zve32f
+    lpad    0
+    ld      a4, 0(a1)
+    ld      a5, 8(a1)
+1:
+    vsetvli t0, a2, e32, m4, ta, ma
+    vle32.v v0, (a4)
+    vle32.v v4, (a5)
+    sub     a2, a2, t0
+    slli    t1, t0, 2
+    vfcvt.rtz.x.f.v v8, v0
+    vfcvt.rtz.x.f.v v12, v4
+    add     a4, a4, t1
+    add     a5, a5, t1
+    vsetvli zero, zero, e16, m2, ta, ma
+    vnclip.wi v16, v8, 0
+    vnclip.wi v18, v12, 0
+    vsetvli zero, zero, e16, m4, ta, ma
+    vmv.v.v v20, v16
+    vslideup.vi v20, v18, 1
+    slli    t2, t0, 1
+    slli    t2, t2, 1
+    vse16.v v20, (a0)
+    add     a0, a0, t2
+    bnez    a2, 1b
+    ret
+endfunc
+
+// void swri_oldapi_conv_fltp_to_s16_nch_rvv(int16_t *dst, float *const *src,
+//                                            int len, int channels)
+func swri_oldapi_conv_fltp_to_s16_nch_rvv, zve32f
+    lpad    0
+    addi    sp, sp, -16
+    sd      s0, 0(sp)
+    sd      s1, 8(sp)
+    mv      s0, a0
+    mv      s1, a2
+1:
+    blez    a3, 4f
+    ld      a4, 0(a1)
+    addi    a1, a1, 8
+    addi    a3, a3, -1
+    mv      a0, s0
+    mv      a2, s1
+2:
+    vsetvli t0, a2, e32, m8, ta, ma
+    vle32.v v0, (a4)
+    sub     a2, a2, t0
+    slli    t1, t0, 2
+    vfcvt.rtz.x.f.v v8, v0
+    add     a4, a4, t1
+    vsetvli zero, zero, e16, m4, ta, ma
+    vnclip.wi v16, v8, 0
+    slli    t2, a3, 1
+    slli    t3, t0, 1
+    mul     t4, t3, a3
+    mv      t5, t0
+3:
+    vsetvli t0, t5, e16, m4, ta, ma
+    vse16.v v16, (a0)
+    sub     t5, t5, t0
+    slli    t6, t0, 1
+    add     a0, a0, t6
+    bnez    t5, 3b
+    bnez    a2, 2b
+    j       1b
+4:
+    ld      s0, 0(sp)
+    ld      s1, 8(sp)
+    addi    sp, sp, 16
+    ret
+endfunc
diff --git a/libswresample/swresample_internal.h 
b/libswresample/swresample_internal.h
index ca2e0d7534..63a4d78aba 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -228,5 +228,9 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                  enum AVSampleFormat out_fmt,
                                  enum AVSampleFormat in_fmt,
                                  int channels);
+void swri_audio_convert_init_riscv(struct AudioConvert *ac,
+                                 enum AVSampleFormat out_fmt,
+                                 enum AVSampleFormat in_fmt,
+                                 int channels);
 
 #endif
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to