https://gcc.gnu.org/g:409ea888f73b2d4ae17686b28d33ca4634dafcfb
commit r16-1197-g409ea888f73b2d4ae17686b28d33ca4634dafcfb Author: Jeff Law <j...@ventanamicro.com> Date: Thu Jun 5 16:58:45 2025 -0600 [RISC-V] Improve signed division by 2^n So another class of cases where we can do better than a zicond sequence. Like the prior patch this came up evaluating some code from Shreya to detect more conditional move cases. This patch allows us to use the "splat the sign bit" idiom to efficiently select between 0 and 2^n-1. That's particularly important for signed division by a power of two. For signed division by a power of 2, you conditionally add 2^n-1 to the numerator, then right shift that result. Using zicond somewhat naively you get something like this (for n / 4096): > li a5,4096 > addi a5,a5,-1 > slti a4,a0,0 > add a5,a0,a5 > czero.eqz a5,a5,a4 > czero.nez a0,a0,a4 > add a0,a0,a5 > srai a0,a0,12 After this patch you get this instead: > srai a5,a0,63 > srli a5,a5,52 > add a0,a5,a0 > srai a0,a0,12 It's not *that* much faster, but it's certainly shorter. So the trick here is that after splatting the sign bit we have 0, -1. So a subsequent logical shift right would generate 0 or 2^n-1. Yes, there a nice variety of other constant pairs we can select between. Some notes have been added to the PR I opened yesterday. The first thing we need to do is throttle back zicond generation. Unfortunately we don't see the constants from the division-by-2^n algorithm, so we have to disable for all lt/ge 0 cases. This can have small negative impacts. I looked at this across spec and didn't see anything I was particularly worried about and numerous small improvements from that alone. With that in place we need to recognize the form seen by combine. Essentially it sees the splat of the sign bit feeding a logical AND. We split that into two right shifts. This has survived in my tester. Waiting on upstream pre-commit before moving forward. gcc/ * config/riscv/riscv.cc (riscv_expand_conditional_move): Avoid zicond in some cases involving sign bit tests. * config/riscv/riscv.md: Split a splat of the sign bit feeding a masking off high bits into a pair of right shifts. gcc/testsuite * gcc.target/riscv/nozicond-3.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 34 +++++++++++++++++++++++++++++ gcc/config/riscv/riscv.md | 18 +++++++++++++++ gcc/testsuite/gcc.target/riscv/nozicond-3.c | 11 ++++++++++ 3 files changed, 63 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 3254ec9f9e13..413eae05f4c9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5393,6 +5393,40 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt) rtx op0 = XEXP (op, 0); rtx op1 = XEXP (op, 1); + /* For some tests, we can easily construct a 0, -1 value + which can then be used to synthesize more efficient + sequences that don't use zicond. */ + if ((code == LT || code == GE) + && (REG_P (op0) || SUBREG_P (op0)) + && op1 == CONST0_RTX (GET_MODE (op0))) + { + /* The code to expand signed division by a power of 2 uses a + conditional add by 2^n-1 idiom. It can be more efficiently + synthesized without zicond using srai+srli+add. + + But we don't see the constants here. Just a conditional move + with registers as the true/false values. So this is a little + over-aggressive and can result in a few missed if-conversions. */ + if ((REG_P (cons) || SUBREG_P (cons)) + && (REG_P (alt) || SUBREG_P (alt))) + return false; + + /* If one value is a nonzero constant and the other value is + not a constant, then avoid zicond as more efficient sequences + using the splatted sign bit are often possible. */ + if (CONST_INT_P (alt) + && alt != CONST0_RTX (mode) + && !CONST_INT_P (cons)) + return false; + + if (CONST_INT_P (cons) + && cons != CONST0_RTX (mode) + && !CONST_INT_P (alt)) + return false; + + /* If we need more special cases, add them here. */ + } + if (((TARGET_ZICOND_LIKE || (arith_operand (cons, mode) && arith_operand (alt, mode))) && (GET_MODE_CLASS (mode) == MODE_INT)) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 92fe7c7741a2..6d3c80a04c74 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4834,6 +4834,24 @@ [(set_attr "type" "move") (set_attr "mode" "<MODE>")]) +;; If we're trying to create 0 or 2^n-1 based on the result of +;; a test such as (lt (reg) (const_int 0)), we'll see a splat of +;; the sign bit across a GPR using srai, then a logical and to +;; mask off high bits. We can replace the logical and with +;; a logical right shift which works without constant synthesis +;; for larger constants. +(define_split + [(set (match_operand:X 0 "register_operand") + (and:X (ashiftrt:X (match_operand:X 1 "register_operand") + (match_operand 2 "const_int_operand")) + (match_operand 3 "const_int_operand")))] + "(INTVAL (operands[2]) == BITS_PER_WORD - 1 + && exact_log2 (INTVAL (operands[3]) + 1) >= 0)" + [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:X (match_dup 0) (match_dup 3)))] + { operands[3] = GEN_INT (BITS_PER_WORD + - exact_log2 (INTVAL (operands[3]) + 1)); }) + (include "bitmanip.md") (include "crypto.md") (include "sync.md") diff --git a/gcc/testsuite/gcc.target/riscv/nozicond-3.c b/gcc/testsuite/gcc.target/riscv/nozicond-3.c new file mode 100644 index 000000000000..5116742bc3e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/nozicond-3.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target { rv64 } } } */ +/* { dg-additional-options "-march=rv64gc_zicond -mabi=lp64d -mbranch-cost=4" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */ + +long foo1 (long n) { return n / 4096; } + +/* { dg-final { scan-assembler-times {srai\t} 2 } } */ +/* { dg-final { scan-assembler-times {srli\t} 1 } } */ +/* { dg-final { scan-assembler-times {add\t} 1 } } */ +/* { dg-final { scan-assembler-not {czero} } } */ +