PR #21324 opened by Rémi Denis-Courmont (Courmisch) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21324 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21324.patch
From a665a59e8aa0d25eccc2758a18b5ef1ab2783d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]> Date: Mon, 22 Dec 2025 22:42:47 +0200 Subject: [PATCH 1/5] checkasm/riscv: factor out the vector clobbers No functional changes. --- tests/checkasm/riscv/checkasm.S | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index c4b034ae23..5283596575 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -79,20 +79,7 @@ func checkasm_get_wrapper, v .align 2 2: /* <-- Entry point with the Vector extension --> */ lpad 0 - /* Clobber the vectors */ - vsetvli t0, zero, e32, m8, ta, ma - li t0, 0xdeadbeef - vmv.v.x v0, t0 - vmv.v.x v8, t0 - vmv.v.x v16, t0 - vmv.v.x v24, t0 - - /* Clobber the vector configuration */ - li t0, 0 /* Vector length: zero */ - li t2, -4 /* Vector type: illegal */ - vsetvl zero, t0, t2 - csrwi vxrm, 3 /* Rounding mode: round-to-odd */ - csrwi vxsat, 1 /* Saturation: encountered */ + jal t0, .Lclobber_v .align 2 3: /* <-- Entry point without the Vector extension --> */ @@ -181,5 +168,21 @@ func checkasm_get_wrapper, v lla a0, fail_fs_reg call checkasm_fail_func j 4b + +.Lclobber_v: + # Clobber the vector registers + vsetvli t1, zero, e32, m8, ta, ma + li t1, -0xdeadbeef + vmv.v.x v0, t1 + vmv.v.x v8, t1 + vmv.v.x v16, t1 + vmv.v.x v24, t1 + # Clobber the vector configuration + li t1, 0 /* Vector length: zero */ + li t3, -4 /* Vector type: illegal */ + vsetvl zero, t1, t3 + csrwi vxrm, 3 /* Rounding mode: round-to-odd */ + csrwi vxsat, 1 /* Saturation: encountered */ + jr t0 endfunc #endif -- 2.49.1 From 86a8944491729939c352713d32a700a8a25ba96b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]> Date: Mon, 22 Dec 2025 22:47:46 +0200 Subject: [PATCH 2/5] checkasm/riscv: restore stack before failure reporting The faiure handling code is C and requires correct stack, global and thread pointers. This restores them before returning to C. At the same time, we no longer need to abort() afterwards. --- tests/checkasm/riscv/checkasm.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index 5283596575..adbf0c0b10 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -155,9 +155,13 @@ func checkasm_get_wrapper, v ret 5: + # checkasm_fail_func() needs valid SP, GP and TP. Restore them. + ld sp, 8(t0) + ld gp, 16(t0) + ld tp, 24(t0) lla a0, fail_rsvd_reg call checkasm_fail_func - tail abort /* The test harness would probably crash anyway */ + j 4b 6: lla a0, fail_s_reg -- 2.49.1 From 810b6761425dc46f04cc36ab3733819772b04363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]> Date: Mon, 22 Dec 2025 22:58:08 +0200 Subject: [PATCH 3/5] checkasm/riscv: print clobbered register name checkasm_fail_func takes a format string, so we can. --- tests/checkasm/riscv/checkasm.S | 53 +++++++++++++++++---------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index adbf0c0b10..1b8fdfe51a 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -22,18 +22,6 @@ #if (__riscv_xlen == 64) -const fail_s_reg - .asciz "callee-saved integer register clobbered" -endconst - -const fail_fs_reg - .asciz "callee-saved floating-point register clobbered" -endconst - -const fail_rsvd_reg - .asciz "unallocatable register clobbered" -endconst - .section .tbss, "waT" .align 3 .hidden checked_func @@ -75,6 +63,15 @@ func checkasm_get_wrapper, v addi sp, sp, 16 ret + .pushsection ".rodata", "a" +.Lfail_s_reg: + .asciz "callee-saved integer register S%d clobbered" +.Lfail_fs_reg: + .asciz "callee-saved floating-point register FS%d clobbered" +.Lfail_rsvd_reg: + .asciz "unallocatable register %cP clobbered" + .popsection + .option norvc .align 2 2: /* <-- Entry point with the Vector extension --> */ @@ -125,21 +122,25 @@ func checkasm_get_wrapper, v /* Check special register values */ la.tls.ie t0, saved_regs add t0, tp, t0 - ld t1, 8(t0) - bne t1, sp, 5f - ld t1, 16(t0) - bne t1, gp, 5f - ld t1, 24(t0) // If TP was corrupted, we probably will have... - bne t1, tp, 5f // ...already crashed before we even get here. + ld t2, 8(t0) // SP + ld t3, 16(t0) // GP + ld t4, 24(t0) // TP + li a1, 'S' + bne t2, sp, .Lfail_xp + li a1, 'G' + bne t3, gp, .Lfail_xp + li a1, 'T' + bne t4, tp, .Lfail_xp /* Check value of saved registers */ li t0, 0xdeadbeef1badf00d .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - bne t0, s\n, 6f + li a1, \n + bne t0, s\n, .Lfail_s #ifdef __riscv_float_abi_double /* TODO: check float ABI single too */ fmv.x.d t1, fs\n - bne t0, t1, 7f + bne t0, t1, .Lfail_fs #endif .endr @@ -154,22 +155,22 @@ func checkasm_get_wrapper, v .endr ret -5: +.Lfail_xp: # checkasm_fail_func() needs valid SP, GP and TP. Restore them. ld sp, 8(t0) ld gp, 16(t0) ld tp, 24(t0) - lla a0, fail_rsvd_reg + lla a0, .Lfail_rsvd_reg call checkasm_fail_func j 4b -6: - lla a0, fail_s_reg +.Lfail_s: + lla a0, .Lfail_s_reg call checkasm_fail_func j 4b -7: - lla a0, fail_fs_reg +.Lfail_fs: + lla a0, .Lfail_fs_reg call checkasm_fail_func j 4b -- 2.49.1 From 494493e729b8ece54f54ecca8726aedb0da8578b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]> Date: Tue, 30 Dec 2025 21:08:23 +0200 Subject: [PATCH 4/5] checkasm/riscv: handle other float ABIs than double This splits out the integer and floating point handling to add support for software, single and quad float ABI's. --- tests/checkasm/checkasm.h | 2 +- tests/checkasm/riscv/checkasm.S | 179 ++++++++++++++++++++++++++------ 2 files changed, 147 insertions(+), 34 deletions(-) diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index a54231dd0d..90aeee27b7 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -301,7 +301,7 @@ void checkasm_checked_call(void *func, ...); void checkasm_set_function(void *); void *checkasm_get_wrapper(void); -#if HAVE_RV && (__riscv_xlen == 64) && defined (__riscv_d) +#if HAVE_RV && (__riscv_xlen == 64) #define declare_new(ret, ...) \ ret (*checked_call)(__VA_ARGS__) = checkasm_get_wrapper(); #define call_new(...) \ diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index 1b8fdfe51a..84c081cbfb 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -20,21 +20,57 @@ #include "libavutil/riscv/asm.S" +#if defined(__riscv_float_abi_soft) +.macro flf rd, addr +.endm +.macro fsf rs, addr +.endm +#define FSZ 0 +#elif defined(__riscv_float_abi_single) +.macro flf rd, addr + flw \rd, \addr +.endm +.macro fsf rs, addr + fsw \rs, \addr +.endm +#define FSZ 4 +#elif defined(__riscv_float_abi_double) +.macro flf rd, addr + fld \rd, \addr +.endm +.macro fsf rs, addr + fsd \rs, \addr +.endm +#define FSZ 8 +#elif defined(__riscv_float_abi_quad) +.macro flf rd, addr + flq \rd, \addr +.endm +.macro fsf rs, addr + fsq \rs, \addr +.endm +#define FSZ 16 +#else +#error "Unknown float ABI" +#endif + #if (__riscv_xlen == 64) - .section .tbss, "waT" + .pushsection .tbss, "waT" .align 3 .hidden checked_func - .hidden saved_regs checked_func: .quad 0 -saved_regs: - /* Space to spill RA, SP, GP, TP, S0-S11 and FS0-FS11 */ - .rept 4 + 12 + 12 - .quad 0 - .endr + .align 3 +.Lsaved_xregs: + .fill 4 + 12, 8, 0 // RA, SP, GP, TP, S0-S11 + .align 4 +.Lsaved_fregs: + .fill 12, FSZ, 0 // FS0-FS11 + .fill 1, 8, 0 // RA + .popsection func checkasm_set_function lpad 0 @@ -53,15 +89,23 @@ func checkasm_get_wrapper, v call av_get_cpu_flags andi t0, a0, 8 /* AV_CPU_FLAG_RVV_I32 */ - - lla a0, 3f +#ifdef __riscv_float_abi_soft + andi t1, a0, 16 /* AV_CPU_FLAG_RVV_F32 (implies F and Zve32x) */ + lla a0, checkasm_checked_call_i beqz t0, 1f - lla a0, 2f + lla a0, checkasm_checked_call_iv + beqz t1, 1f +#else + lla a0, checkasm_checked_call_if + beqz t0, 1f +#endif + lla a0, checkasm_checked_call_ifv 1: ld ra, 8(sp) ld fp, (sp) addi sp, sp, 16 ret +endfunc .pushsection ".rodata", "a" .Lfail_s_reg: @@ -70,27 +114,33 @@ func checkasm_get_wrapper, v .asciz "callee-saved floating-point register FS%d clobbered" .Lfail_rsvd_reg: .asciz "unallocatable register %cP clobbered" +#if defined(__riscv_float_abi_soft) || defined(__riscv_float_abi_single) + .align 2 +.Lbad_float: + .single 123456789 +#elif defined(__riscv_float_abi_double) + .align 3 +.Lbad_float: + .double 123456789 +#elif defined(__riscv_float_abi_quad) + .align 4 +.Lbad_float: + .ldouble 123456789 +#endif .popsection - .option norvc - .align 2 -2: /* <-- Entry point with the Vector extension --> */ - lpad 0 - jal t0, .Lclobber_v - - .align 2 -3: /* <-- Entry point without the Vector extension --> */ +func checkasm_checked_call_i + /* <-- Entry point without the Vector extension --> */ lpad 0 /* Save RA, unallocatable and callee-saved registers */ - la.tls.ie t0, saved_regs + la.tls.ie t0, .Lsaved_xregs add t0, tp, t0 sd ra, (t0) sd sp, 8(t0) sd gp, 16(t0) sd tp, 24(t0) .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - sd s\n, (32 + (16 * \n))(t0) - fsd fs\n, (40 + (16 * \n))(t0) + sd s\n, (32 + (8 * \n))(t0) .endr /* Clobber the stack space right below SP */ @@ -107,9 +157,7 @@ func checkasm_get_wrapper, v .if (\n > 1 && \n < 7) mv t\n, t0 .endif - fmv.d.x ft\n, t0 mv s\n, t0 - fmv.d.x fs\n, t0 .endr /* Call the tested function */ @@ -120,7 +168,7 @@ func checkasm_get_wrapper, v jalr t3 /* Check special register values */ - la.tls.ie t0, saved_regs + la.tls.ie t0, .Lsaved_xregs add t0, tp, t0 ld t2, 8(t0) // SP ld t3, 16(t0) // GP @@ -137,21 +185,15 @@ func checkasm_get_wrapper, v .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 li a1, \n bne t0, s\n, .Lfail_s -#ifdef __riscv_float_abi_double - /* TODO: check float ABI single too */ - fmv.x.d t1, fs\n - bne t0, t1, .Lfail_fs -#endif .endr 4: /* Restore RA and saved registers */ - la.tls.ie t0, saved_regs + la.tls.ie t0, .Lsaved_xregs add t0, tp, t0 ld ra, (t0) .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - ld s\n, (32 + (16 * \n))(t0) - fld fs\n, (40 + (16 * \n))(t0) + ld s\n, (32 + (8 * \n))(t0) .endr ret @@ -168,11 +210,82 @@ func checkasm_get_wrapper, v lla a0, .Lfail_s_reg call checkasm_fail_func j 4b +endfunc + +#ifndef __riscv_float_abi_soft +func checkasm_checked_call_if, f + lpad 0 + # Save callee-saved floating point registers and RA + la.tls.ie t0, .Lsaved_fregs + add t0, t0, tp + lla t1, .Lbad_float + sd ra, 12 * FSZ(t0) + .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + fsf fs\n, \n * FSZ(t0) + .endr + # Clobber the saved and temporary floating point registers + .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + flf ft\n, (t1) + flf fs\n, (t1) + .endr + + jal checkasm_checked_call_i + + # Check value of saved registers + lla t1, .Lbad_float + flf ft0, (t1) + .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + li a1, \n +#if defined(__riscv_float_abi_single) + feq.s t1, ft0, fs\n +#elif defined(__riscv_float_abi_double) + feq.d t1, ft0, fs\n +#else + feq.q t1, ft0, fs\n +#endif + beqz t1, .Lfail_fs + .endr + +1: # Restore callee-saved floating point registers and RA + la.tls.ie t0, .Lsaved_fregs + add t0, t0, tp + .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + flf fs\n, \n * FSZ(t0) + .endr + ld ra, 12 * FSZ(t0) + ret .Lfail_fs: lla a0, .Lfail_fs_reg call checkasm_fail_func - j 4b + j 1b +endfunc +#else +func checkasm_checked_call_if, f + lpad 0 + lla t1, .Lbad_float + # Clobber all floating point registers (soft float ABI). + .irp n, 0, 1, 2, 3, 4, 5, 6, 7 + flw fa\n, (t1) + .endr + .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + flw ft\n, (t1) + flw fs\n, (t1) + .endr + j checkasm_checked_call_i +endfunc + +func checkasm_checked_call_iv, zve32x + lpad 0 + jal t0, .Lclobber_v + j checkasm_checked_call_i +endfunc +#endif + +func checkasm_checked_call_ifv, zve32x + lpad 0 + jal t0, .Lclobber_v + j checkasm_checked_call_if .Lclobber_v: # Clobber the vector registers -- 2.49.1 From 50108118944adae533b2b99dfc7548d5a7fa417d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <[email protected]> Date: Tue, 30 Dec 2025 21:26:46 +0200 Subject: [PATCH 5/5] checkasm/riscv: add call checks for riscv32 --- tests/checkasm/checkasm.h | 2 +- tests/checkasm/riscv/checkasm.S | 128 +++++++++++++++++++------------- 2 files changed, 79 insertions(+), 51 deletions(-) diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 90aeee27b7..4bd7f3ee0c 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -301,7 +301,7 @@ void checkasm_checked_call(void *func, ...); void checkasm_set_function(void *); void *checkasm_get_wrapper(void); -#if HAVE_RV && (__riscv_xlen == 64) +#if HAVE_RV #define declare_new(ret, ...) \ ret (*checked_call)(__VA_ARGS__) = checkasm_get_wrapper(); #define call_new(...) \ diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S index 84c081cbfb..c35c9f1113 100644 --- a/tests/checkasm/riscv/checkasm.S +++ b/tests/checkasm/riscv/checkasm.S @@ -20,6 +20,38 @@ #include "libavutil/riscv/asm.S" +#if (__riscv_xlen == 32) +.macro lx rd, addr + lw \rd, \addr +.endm + +.macro sx rs, addr + sw \rs, \addr +.endm +#define REG_MAGIC 0xdeadbeef +#elif (__riscv_xlen == 64) +.macro lx rd, addr + ld \rd, \addr +.endm + +.macro sx rs, addr + sd \rs, \addr +.endm +#define REG_MAGIC 0xdeadbeef0badf00d +#else +.macro lx rd, addr + lq \rd, \addr +.endm + +.macro sx rs, addr + sq \rs, \addr +.endm +#define REG_MAGIC 0xdeadbeef0badf00daaaabbbbccccdddd +#endif +#define XSZ (__riscv_xlen / 8) +#define STACK_ALIGN 16 +#define STACK_SPACE(sz) (((sz) + (STACK_ALIGN - 1)) & -STACK_ALIGN) + #if defined(__riscv_float_abi_soft) .macro flf rd, addr .endm @@ -54,38 +86,33 @@ #error "Unknown float ABI" #endif -#if (__riscv_xlen == 64) - .pushsection .tbss, "waT" - .align 3 - .hidden checked_func - -checked_func: - .quad 0 - - .align 3 + .align 4 +.Lchecked_func: + .fill 1, XSZ, 0 + .align 4 .Lsaved_xregs: - .fill 4 + 12, 8, 0 // RA, SP, GP, TP, S0-S11 + .fill 4 + 12, XSZ, 0 // RA, SP, GP, TP, S0-S11 .align 4 .Lsaved_fregs: .fill 12, FSZ, 0 // FS0-FS11 - .fill 1, 8, 0 // RA + .fill 1, XSZ, 0 // RA .popsection func checkasm_set_function lpad 0 - la.tls.ie t0, checked_func + la.tls.ie t0, .Lchecked_func add t0, tp, t0 - sd a0, (t0) + sx a0, (t0) ret endfunc func checkasm_get_wrapper, v lpad 0 - addi sp, sp, -16 - sd fp, (sp) - sd ra, 8(sp) - addi fp, sp, 16 + addi sp, sp, -STACK_SPACE(2 * XSZ) + sx fp, (sp) + sx ra, XSZ(sp) + addi fp, sp, STACK_SPACE(2 * XSZ) call av_get_cpu_flags andi t0, a0, 8 /* AV_CPU_FLAG_RVV_I32 */ @@ -101,8 +128,8 @@ func checkasm_get_wrapper, v #endif lla a0, checkasm_checked_call_ifv 1: - ld ra, 8(sp) - ld fp, (sp) + lx ra, XSZ(sp) + lx fp, (sp) addi sp, sp, 16 ret endfunc @@ -135,44 +162,46 @@ func checkasm_checked_call_i /* Save RA, unallocatable and callee-saved registers */ la.tls.ie t0, .Lsaved_xregs add t0, tp, t0 - sd ra, (t0) - sd sp, 8(t0) - sd gp, 16(t0) - sd tp, 24(t0) + sx ra, (t0) + sx sp, 1 * XSZ(t0) + sx gp, 2 * XSZ(t0) + sx tp, 3 * XSZ(t0) .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - sd s\n, (32 + (8 * \n))(t0) + sx s\n, (4 + \n) * XSZ(t0) .endr /* Clobber the stack space right below SP */ - li t0, 0xdeadbeef1badf00d - .rept 16 - addi sp, sp, -16 - sd t0, (sp) - sd t0, 8(sp) - .endr - addi sp, sp, 256 + li t1, REG_MAGIC + li t0, 16 +1: + addi sp, sp, -XSZ + addi t0, t0, -1 + sx t1, (sp) + bnez t0, 1b - /* Clobber the saved and temporary registers */ + addi sp, sp, 16 * XSZ + # Clobber temporary registers (except T2, FE-CFI label) + .irp n, 0, 1, 3, 4, 5, 6 + mv t\n, t1 + .endr + # Clobber the saved registers .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - .if (\n > 1 && \n < 7) - mv t\n, t0 - .endif - mv s\n, t0 + mv s\n, t1 .endr /* Call the tested function */ - la.tls.ie t0, checked_func + la.tls.ie t0, .Lchecked_func add t0, tp, t0 - ld t3, (t0) - sd zero, (t0) + lx t3, (t0) + sx zero, (t0) jalr t3 /* Check special register values */ la.tls.ie t0, .Lsaved_xregs add t0, tp, t0 - ld t2, 8(t0) // SP - ld t3, 16(t0) // GP - ld t4, 24(t0) // TP + lx t2, 1 * XSZ(t0) // SP + lx t3, 2 * XSZ(t0) // GP + lx t4, 3 * XSZ(t0) // TP li a1, 'S' bne t2, sp, .Lfail_xp li a1, 'G' @@ -181,27 +210,27 @@ func checkasm_checked_call_i bne t4, tp, .Lfail_xp /* Check value of saved registers */ - li t0, 0xdeadbeef1badf00d + li t1, REG_MAGIC .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 li a1, \n - bne t0, s\n, .Lfail_s + bne t1, s\n, .Lfail_s .endr 4: /* Restore RA and saved registers */ la.tls.ie t0, .Lsaved_xregs add t0, tp, t0 - ld ra, (t0) + lx ra, (t0) .irp n, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - ld s\n, (32 + (8 * \n))(t0) + lx s\n, (4 + \n) * XSZ(t0) .endr ret .Lfail_xp: # checkasm_fail_func() needs valid SP, GP and TP. Restore them. - ld sp, 8(t0) - ld gp, 16(t0) - ld tp, 24(t0) + lx sp, 1 * XSZ(t0) + lx gp, 2 * XSZ(t0) + lx tp, 3 * XSZ(t0) lla a0, .Lfail_rsvd_reg call checkasm_fail_func j 4b @@ -303,4 +332,3 @@ func checkasm_checked_call_ifv, zve32x csrwi vxsat, 1 /* Saturation: encountered */ jr t0 endfunc -#endif -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
