Steve Lhomme pushed to branch master at VideoLAN / VLC


Commits:
6cccfb3a by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
riscv: add macros for function boilerplate

- - - - -
f74d450a by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
riscv: add forward-edge CFI landing pads

- - - - -
67e1e0d3 by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
cpu: add RISC-V B Bit manipulation extension

- - - - -
6055c11b by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
cpu: run-time detection for RISC-V B

- - - - -
5e5007a9 by Rémi Denis-Courmont at 2025-10-15T05:28:28+00:00
rvv: use Zba SHxADD where applicable

I don't know any hardware that would support the Vector extension and
yet would not support the Bit-manip extension (Zba + Zbb + Zbs), so this
should be fine.

- - - - -


12 changed files:

- include/vlc_cpu.h
- modules/isa/riscv/Makefile.am
- modules/isa/riscv/deinterlace.c
- + modules/isa/riscv/macros.S
- modules/isa/riscv/mixer.c
- modules/isa/riscv/rvv_amplify.S
- modules/isa/riscv/rvv_merge.S
- modules/isa/riscv/rvv_transform.S
- src/freebsd/cpu.c
- src/linux/cpu.c
- src/misc/cpu.c
- src/openbsd/cpu.c


Changes:

=====================================
include/vlc_cpu.h
=====================================
@@ -163,6 +163,7 @@ unsigned vlc_CPU_raw(void);
 #   define HAVE_FPU 1
 #  endif
 #  define VLC_CPU_RV_V 0x1
+#  define VLC_CPU_RV_B 0x2
 
 #  ifdef __riscv_v
 #   define vlc_CPU_RV_V() (1)
@@ -170,6 +171,13 @@ unsigned vlc_CPU_raw(void);
 #   define vlc_CPU_RV_V() ((vlc_CPU() & VLC_CPU_RV_V) != 0)
 #  endif
 
+#  if (defined (__riscv_b) || (defined (__riscv_zba) && defined (__riscv_zbb) \
+                            && defined (__riscv_zbs)))
+#   define vlc_CPU_RV_B() (1)
+#  else
+#   define vlc_CPU_RV_B() ((vlc_CPU() & VLC_CPU_RV_B) != 0)
+#  endif
+
 # else
 /**
  * Are single precision floating point operations "fast"?


=====================================
modules/isa/riscv/Makefile.am
=====================================
@@ -8,6 +8,8 @@ libtransform_rvv_plugin_la_SOURCES = \
 libvolume_rvv_plugin_la_SOURCES = isa/riscv/mixer.c isa/riscv/rvv_amplify.S
 libvolume_rvv_plugin_la_LIBADD = $(AM_LIBADD) $(LIBM)
 
+EXTRA_DIST += isa/riscv/macros.S
+
 if HAVE_RVV
 riscv_LTLIBRARIES = \
        libdeinterlace_rvv_plugin.la \


=====================================
modules/isa/riscv/deinterlace.c
=====================================
@@ -36,7 +36,9 @@ static void Probe(void *data)
         struct deinterlace_functions *const f = data;
 
         f->merges[0] = merge8_rvv;
-        f->merges[1] = merge16_rvv;
+
+        if (vlc_CPU_RV_B())
+            f->merges[1] = merge16_rvv;
     }
 }
 


=====================================
modules/isa/riscv/macros.S
=====================================
@@ -0,0 +1,40 @@
+/*****************************************************************************
+ * macros.S: RISC-V common assembler macros
+ ******************************************************************************
+ * Copyright (C) 2022 Rémi Denis-Courmont
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+.macro func sym
+       .text
+       .global \sym
+       .hidden \sym
+       .type   \sym, %function
+       .align  2
+\sym:
+
+       .macro  endfunc
+               .size   \sym, . - \sym
+               .previous
+               .purgem endfunc
+       .endm
+.endm
+
+#if !defined (__riscv_zicfilp)
+.macro lpad    lpl
+       auipc   zero, \lpl
+.endm
+#endif


=====================================
modules/isa/riscv/mixer.c
=====================================
@@ -95,7 +95,7 @@ static int Probe(vlc_object_t *obj)
 {
     audio_volume_t *volume = (audio_volume_t *)obj;
 
-    if (!vlc_CPU_RV_V())
+    if (!vlc_CPU_RV_V() || !vlc_CPU_RV_B())
         return VLC_ENOTSUP;
 
     switch (volume->format) {


=====================================
modules/isa/riscv/rvv_amplify.S
=====================================
@@ -18,80 +18,84 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
-       .option arch, +v
-       .text
-       .align 2
+#include "macros.S"
 
-       .globl  rvv_amplify_f32
-       .type   rvv_amplify_f32, %function
-rvv_amplify_f32:
+       .option arch, +b, +v
+
+func   rvv_amplify_f32
+       lpad    0
        srli    a2, a2, 2
 #if defined (__riscv_float_abi_soft)
        fmv.w.x fa0, a3
 #endif
-1:     vsetvli t0, a2, e32, m8, ta, ma
-       slli    t1, t0, 2
+1:
+       vsetvli t0, a2, e32, m8, ta, ma
        vle32.v v16, (a1)
-       add     a1, a1, t1
+       sh2add  a1, t0, a1
        vfmul.vf        v16, v16, fa0
        sub     a2, a2, t0
        vse32.v v16, (a0)
-       add     a0, a0, t1
+       sh2add  a0, t0, a0
        bnez    a2, 1b
+
        ret
+endfunc
 
-       .globl  rvv_amplify_f64
-       .type   rvv_amplify_f64, %function
-rvv_amplify_f64:
+func   rvv_amplify_f64
+       lpad    0
        srli    a2, a2, 3
 #if defined (__riscv_float_abi_soft) || defined (__riscv_float_abi_single)
        fmv.d.x fa0, a3
 #endif
-1:     vsetvli t0, a2, e64, m8, ta, ma
-       slli    t1, t0, 3
+1:
+       vsetvli t0, a2, e64, m8, ta, ma
        vle64.v v16, (a1)
-       add     a1, a1, t1
+       sh3add  a1, t0, a1
        vfmul.vf        v16, v16, fa0
        sub     a2, a2, t0
        vse64.v v16, (a0)
-       add     a0, a0, t1
+       sh3add  a0, t0, a0
        bnez    a2, 1b
+
        ret
+endfunc
 
-       .globl  rvv_amplify_i16
-       .type   rvv_amplify_i16, %function
-rvv_amplify_i16:
+func   rvv_amplify_i16
+       lpad    0
        srli    a2, a2, 1
-1:     vsetvli t0, a2, e16, m8, ta, ma
-       slli    t1, t0, 1
+1:
+       vsetvli t0, a2, e16, m8, ta, ma
        vle16.v v16, (a1)
-       add     a1, a1, t1
+       sh1add  a1, t0, a1
        vmulhsu.vx      v16, v16, a3
        sub     a2, a2, t0
        vse16.v v16, (a0)
-       add     a0, a0, t1
+       sh1add  a0, t0, a0
        bnez    a2, 1b
+
        ret
+endfunc
 
-       .globl  rvv_amplify_i32
-       .type   rvv_amplify_i32, %function
-rvv_amplify_i32:
+func   rvv_amplify_i32
+       lpad    0
        srli    a2, a2, 2
-1:     vsetvli t0, a2, e32, m8, ta, ma
-       slli    t1, t0, 2
+1:
+       vsetvli t0, a2, e32, m8, ta, ma
        vle32.v v16, (a1)
-       add     a1, a1, t1
+       sh2add  a1, t0, a1
        vmulhsu.vx      v16, v16, a3
        sub     a2, a2, t0
        vse32.v v16, (a0)
-       add     a0, a0, t1
+       sh2add  a0, t0, a0
        bnez    a2, 1b
+
        ret
+endfunc
 
-       .globl  rvv_amplify_u8
-       .type   rvv_amplify_u8, %function
-rvv_amplify_u8:
-1:     vsetvli t0, a2, e8, m8, ta, ma
+func   rvv_amplify_u8
+       lpad    0
+1:
+       vsetvli t0, a2, e8, m8, ta, ma
        vle8.v  v16, (a1)
        add     a1, a1, t0
        vmulhu.vx       v16, v16, a3
@@ -99,5 +103,6 @@ rvv_amplify_u8:
        vse8.v  v16, (a0)
        add     a0, a0, t0
        bnez    a2, 1b
-       ret
 
+       ret
+endfunc


=====================================
modules/isa/riscv/rvv_merge.S
=====================================
@@ -18,15 +18,15 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
-       .option arch, +v
-       .text
-       .align  2
+#include "macros.S"
 
-       .globl  merge8_rvv
-       .type   merge8_rvv, %function
-merge8_rvv:
+       .option arch, +b, +v
+
+func   merge8_rvv
+       lpad    0
        csrwi   vxrm, 0
-1:     vsetvli t0, a3, e8, m8, ta, ma
+1:
+       vsetvli t0, a3, e8, m8, ta, ma
        vle8.v  v16, (a1)
        add     a1, a1, t0
        vle8.v  v24, (a2)
@@ -36,25 +36,25 @@ merge8_rvv:
        vse8.v  v16, (a0)
        add     a0, a0, t0
        bnez    a3, 1b
+
        ret
-       .size   merge8_rvv, . - merge8_rvv
+endfunc
 
-       .globl  merge16_rvv
-       .type   merge16_rvv, %function
-merge16_rvv:
+func   merge16_rvv
+       lpad    0
        csrwi   vxrm, 0
        srli    a3, a3, 1
-1:     vsetvli t0, a3, e16, m8, ta, ma
-       slli    t1, t0, 1
+1:
+       vsetvli t0, a3, e16, m8, ta, ma
        vle16.v v16, (a1)
-       add     a1, a1, t1
+       sh1add  a1, t0, a1
        vle16.v v24, (a2)
-       add     a2, a2, t1
+       sh1add  a2, t0, a2
        vaaddu.vv       v16, v16, v24
        sub     a3, a3, t0
        vse16.v v16, (a0)
-       add     a0, a0, t1
+       sh1add  a0, t0, a0
        bnez    a3, 1b
-       ret
-       .size   merge16_rvv, . - merge16_rvv
 
+       ret
+endfunc


=====================================
modules/isa/riscv/rvv_transform.S
=====================================
@@ -18,20 +18,19 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
+#include "macros.S"
+
        .option arch, +v
-       .text
-       .align  2
 
        .macro transforms, bits, order
        .if     \bits - (8 << \order)
        .error  "Mismatched parameters"
        .endif
 
-       .globl  rvv_hflip_\bits
-       .type   rvv_hflip_\bits, %function
+func   rvv_hflip_\bits
        // a0:out_base, a1:out_stride, a2:in_base, a3:in_stride
        // a4:width, a5:height
-rvv_hflip_\bits :
+       lpad    0
        .if     \order
        slli    t4, a4, \order
        add     a2, a2, t4
@@ -40,11 +39,12 @@ rvv_hflip_\bits :
        .endif
        li      t6, -(1 << \order)
        add     a2, a2, t6
-
-1:     mv      t0, a0
+1:
+       mv      t0, a0
        mv      t2, a2
        mv      t4, a4
-2:     vsetvli t5, t4, e\bits, m8, ta, ma
+2:
+       vsetvli t5, t4, e\bits, m8, ta, ma
        sub     t4, t4, t5
        vlse\bits\().v  v0, (t2), t6
        .if     \order
@@ -59,20 +59,22 @@ rvv_hflip_\bits :
        add     a0, a0, a1
        add     a2, a2, a3
        bnez    a5, 1b
+
        ret
-       .size   rvv_hflip_\bits, . - rvv_hflip_\bits
+endfunc
 
-       .globl  rvv_transpose_\bits
-       .type   rvv_transpose_\bits, %function
+func   rvv_transpose_\bits
        // a0:out_base, a1:out_stride, a2:in_base, a3:in_stride
        // a4:in_width/out_height, a5:in_height/out_width
-rvv_transpose_\bits :
-1:     mv      t0, a0
+       lpad    0
+1:
+       mv      t0, a0
        mv      t2, a2
        mv      t4, a4
        /* For the sake of locality, the inner loop transposes VL rows at once
         * rather than one column. */
-2:     vsetvli t5, a5, e\bits, m8, ta, ma
+2:
+       vsetvli t5, a5, e\bits, m8, ta, ma
        vlse\bits\().v  v0, (t2), a3
        addi    t2, t2, (1 << \order)
        vse\bits\().v   v0, (t0)
@@ -88,9 +90,9 @@ rvv_transpose_\bits :
        add     a0, a0, t5      // VL output columns done
        add     a2, a2, t3      // VL input rows done
        bnez    a5, 1b
-       ret
-       .size   rvv_transpose_\bits, . - rvv_transpose_\bits
 
+       ret
+endfunc
        .endm // transforms
 
        transforms       8, 0


=====================================
src/freebsd/cpu.c
=====================================
@@ -90,6 +90,8 @@ unsigned vlc_CPU_raw(void)
 
     elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
 
+    if (hwcap & HWCAP_RV('B'))
+        flags |= VLC_CPU_RV_B;
     if (hwcap & HWCAP_RV('V'))
         flags |= VLC_CPU_RV_V;
 


=====================================
src/linux/cpu.c
=====================================
@@ -90,6 +90,8 @@ unsigned vlc_CPU_raw(void)
     const unsigned long hwcap = getauxval(AT_HWCAP);
     unsigned int flags = 0;
 
+    if (hwcap & HWCAP_RV('B'))
+        flags |= VLC_CPU_RV_B;
     if (hwcap & HWCAP_RV('V'))
         flags |= VLC_CPU_RV_V;
 


=====================================
src/misc/cpu.c
=====================================
@@ -186,6 +186,8 @@ void vlc_CPU_dump (vlc_object_t *obj)
         vlc_memstream_puts(&stream, "ARM_NEON ");
 
 #elif defined (__riscv)
+    if (vlc_CPU_RV_B())
+        vlc_memstream_puts(&stream, "B ");
     if (vlc_CPU_RV_V())
         vlc_memstream_puts(&stream, "V ");
 


=====================================
src/openbsd/cpu.c
=====================================
@@ -87,6 +87,8 @@ unsigned vlc_CPU_raw(void)
 
     elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
 
+    if (hwcap & HWCAP_RV('B'))
+        flags |= VLC_CPU_RV_B;
     if (hwcap & HWCAP_RV('V'))
         flags |= VLC_CPU_RV_V;
 



View it on GitLab: 
https://code.videolan.org/videolan/vlc/-/compare/851e8e217ae157130e8d30b5e1d68f28baf5b4b4...5e5007a9c31af2a8a81073f972abb7601c253424

-- 
View it on GitLab: 
https://code.videolan.org/videolan/vlc/-/compare/851e8e217ae157130e8d30b5e1d68f28baf5b4b4...5e5007a9c31af2a8a81073f972abb7601c253424
You're receiving this email because of your account on code.videolan.org.


VideoLAN code repository instance
_______________________________________________
vlc-commits mailing list
[email protected]
https://mailman.videolan.org/listinfo/vlc-commits

Reply via email to