https://gcc.gnu.org/g:baa8b724674018e568c571b2b610658d8dc0223d
commit r17-1060-gbaa8b724674018e568c571b2b610658d8dc0223d Author: Jeff Law <[email protected]> Date: Sun May 31 16:34:42 2026 -0600 [RISC-V][PR rtl-optimization/123313] Improve select between reg,-1 So this improves our ability to select across reg,-1. The early versions of this patch allowed const,-1, but those sequences weren't any better and occasionally ever-so-slightly worse, so those are rejected. I've spot checked spec2017 where it does show up, but it's not clear that it's showing up in any hot code. The basic idea is to use an sCC to generate 1,0, subtract 1 giving 0, -1. Then we can IOR that with the other input. Concretely: > int f(int a, int b, int c) > { > a = -1; > if (c < 10) a = b; > return a; > } Currently generates: > li a5,9 > addi a1,a1,1 > sgt a2,a2,a5 > czero.nez a2,a1,a2 > addi a0,a2,-1 > ret After this patch: > slti a0,a2,10 > addi a0,a0,-1 > or a0,a0,a1 > ret Probably the same performance on 4+ wide designs (and perhaps often on a 2 wide designs). But it encodes a lot more efficiently, 18 bytes for the first sequence, just 10 bytes for the second. That can be important on some designs, particularly since if-converted blocks are more likely to be large and/or cross cache line boundaries. This has been bootstrapped and regression tested on x86, and riscv64. The riscv64 bootstraps were on the Pioneer, K1 (early version of the patch) and K3 (most recent versions). It's also been tested on all the *-elf platforms in my tester as well as additional bootstraps on platforms like alpha, sh4, etc. I'll wait for a final confirmation from the pre-commit tester before moving forward. PR rtl-optimization/123313 gcc/ * ifcvt.cc (noce_try_store_flag_logical): New function. (noce_process_if_block): Call it. gcc/testsuite/ * gcc.target/riscv/pr123313.c: New test. * gcc.target/riscv/pr124009.c: Adjust expected output. Diff: --- gcc/ifcvt.cc | 72 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/riscv/pr123313.c | 16 +++++++ gcc/testsuite/gcc.target/riscv/pr124009.c | 2 +- 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index c80a02b2255b..37e757461934 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -1778,6 +1778,75 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) return false; } +/* This is trying to capture cases like dest = cond ? a : -1. + + The basic idea is to use a store-flag insn to generate a -1/0 + value (directly or indirectly), then IOR that with the other + input. */ +static bool +noce_try_store_flag_logical (struct noce_if_info *if_info) +{ + rtx a = if_info->a; + rtx b = if_info->b; + rtx dest = if_info->x; + machine_mode mode = GET_MODE (dest); + + if (STORE_FLAG_VALUE != -1 && STORE_FLAG_VALUE != 1) + return false; + + if (!noce_simple_bbs (if_info)) + return false; + + bool swapped = false; + if (a == CONSTM1_RTX (GET_MODE (dest)) + && if_info->rev_cond) + { + std::swap (a, b); + swapped = true; + } + + if (b != CONSTM1_RTX (GET_MODE (dest))) + return false; + + /* If the other arm is not a REG/SUBREG, then punt. This is primarily to + let the target handle the constant case, which it can likely do better. + It also means we don't have to worry about non terminal expressions. */ + if (!REG_P (a) && !SUBREG_P (a)) + return false; + + /* At this point we've got dest = cond ? a : -1. Emit the store flag and + adjust its value (if necessary) to -1/0. */ + start_sequence (); + rtx temp = gen_reg_rtx (mode); + rtx target = noce_emit_store_flag (if_info, temp, !swapped, false); + if (!target) + { + end_sequence (); + return false; + } + + if (STORE_FLAG_VALUE == 1) + { + rtx x = gen_rtx_PLUS (mode, target, CONSTM1_RTX (mode)); + emit_move_insn (target, x); + } + + /* Now we've got -1/0 in TARGET. We can just IOR with A. */ + rtx x = gen_rtx_IOR (mode, target, a); + emit_move_insn (dest, x); + + /* We've generated all the RTL, make sure it recognizes and is + profitable. */ + rtx_insn *seq = end_ifcvt_sequence (if_info); + if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info)) + return false; + + emit_insn_before_setloc (seq, if_info->jump, + INSN_LOCATION (if_info->insn_a)); + if_info->transform_name = "noce_try_store_flag_logical"; + return true; +} + /* Convert "if (test) foo++" into "foo += (test != 0)", and similarly for "foo--". */ @@ -4483,6 +4552,9 @@ noce_process_if_block (struct noce_if_info *if_info) goto success; if (noce_try_sign_bit_splat (if_info)) goto success; + if (!targetm.have_conditional_execution () + && noce_try_store_flag_logical (if_info)) + goto success; if (HAVE_conditional_move && noce_try_cmove (if_info)) goto success; diff --git a/gcc/testsuite/gcc.target/riscv/pr123313.c b/gcc/testsuite/gcc.target/riscv/pr123313.c new file mode 100644 index 000000000000..c50478c87bf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr123313.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv64gcb_zicond -mabi=lp64d -mbranch-cost=4" { target rv64 } } */ +/* { dg-additional-options "-march=rv32gcb_zicond -mabi=ilp32 -mbranch-cost=4" { target rv32 } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */ + +#define TEST(TYPE, NAME) TYPE f##NAME(TYPE a, TYPE b, TYPE c) { a = -1; if (c < 10) a = b; return a; } + +TEST(signed char, sc) +TEST(signed short, ss) +TEST(signed int, si) +TEST(signed long, sl) + +/* { dg-final { scan-assembler-times "slti\t" 4 } } */ +/* { dg-final { scan-assembler-times "addi\t" 4 } } */ +/* { dg-final { scan-assembler-times "or\t" 4 } } */ +/* { dg-final { scan-assembler-not "czero" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr124009.c b/gcc/testsuite/gcc.target/riscv/pr124009.c index 6f541cacbb85..25a3bc93499b 100644 --- a/gcc/testsuite/gcc.target/riscv/pr124009.c +++ b/gcc/testsuite/gcc.target/riscv/pr124009.c @@ -5,7 +5,7 @@ int foo(int x, int y) { return (y < x) ? 1 : -1; } -/* { dg-final { scan-assembler-times {slli\t} 1 } } */ +/* { dg-final { scan-assembler-times {(slli|ori)\t} 1 } } */ /* { dg-final { scan-assembler-times {addi\t} 1 } } */ /* { dg-final { scan-assembler-not {czero.eqz\t} } } */
