https://gcc.gnu.org/g:18cbdb748ddc3588feb6ad29073c57253a290fa5
commit 18cbdb748ddc3588feb6ad29073c57253a290fa5 Author: Jeff Law <[email protected]> Date: Mon Mar 9 08:19:33 2026 -0600 More improvements for 85234 Diff: --- gcc/config/riscv/bitmanip.md | 32 +++++++++ gcc/config/riscv/predicates.md | 8 +++ gcc/config/riscv/riscv.md | 119 +++++++++++++++++++++++++++++++ gcc/match.pd | 17 +++++ gcc/testsuite/gcc.target/riscv/pr85234.c | 43 +++++++++++ 5 files changed, 219 insertions(+) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 0d16d79df3ae..5a12f1ea763a 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -1365,3 +1365,35 @@ (set (match_dup 0) (zero_extract:X (match_dup 3) (const_int 1) (zero_extend:X (match_dup 2))))]) + + +;; So the basic idea here is to realize that if we just want to test a +;; single bit in SImode, we can left shift the input by 32 additional +;; bit positions. That removes any "junk" in the high order bits. We +;; already needed to do a shift, so that's essentially free. We can +;; adjust the constant for free as well. +;; +;; To test if a register is equal to a constant with a single bit set +;; we an flip the bit and test against zero. +(define_insn_and_split "" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_eq:DI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")) + (match_operand 3 "const_int_operand"))) + (clobber (match_scratch:DI 4 "=&r"))] + "(TARGET_64BIT + && TARGET_ZBS + && exact_log2 (UINTVAL (operands[3]) & 0xffffffff) >= 0)" + + "#" + "&& reload_completed" + [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 2))) + (set (match_dup 4) (xor:DI (match_dup 4) (match_dup 3))) + (set (match_dup 0) (any_eq:DI (match_dup 4) (const_int 0)))] +{ + operands[1] = gen_lowpart (DImode, operands[1]); + operands[2] = gen_int_mode (INTVAL (operands[2]) + 32, QImode); + operands[3] = gen_int_mode (UINTVAL (operands[3]) << 32, DImode); +} + [(set_attr "type" "arith")]) + diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 220a6f0830c7..73e6a72ff5be 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -265,6 +265,14 @@ return true; }) +;; If we invert every bit are we left with a constant that is 2^n - 1? +(define_predicate "inverted_p2m1_operand" + (match_code "const_int") +{ + int val = exact_log2 (~UINTVAL (op) + 1); + return val >= 0; +}) + (define_predicate "high_mask_shift_operand" (match_code "const_int") { diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 3fe0ad0ccdf4..075007a7488e 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -906,6 +906,28 @@ } [(set_attr "type" "arith")]) +;; The immediately preceeding pattern can act as a bridge to this pattern which +;; is just a shNadd + seq/sne. I'd prefer this to be a simple define_split, +;; but with the pattern above being a define_insn_and_split, that forces this +;; one to be a define_insn_and_split as well for combine to work. +(define_insn_and_split "" + [(set (match_operand:X 0 "register_operand" "=r") + (any_eq:X (ashift:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "imm123_operand" "Ds3")) + (match_operand 3 "const_int_operand"))) + (clobber (match_scratch:X 4 "=&r"))] + "(TARGET_ZBA + && operands[3] != const0_rtx + && riscv_const_insns (operands[3], false))" + "#" + "&& reload_completed" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 4) (plus:X (ashift:X (match_dup 1) (match_dup 2)) + (match_dup 4))) + (set (match_dup 0) (any_eq:X (match_dup 4) (const_int 0)))] + { operands[3] = gen_int_mode (-UINTVAL (operands[3]), word_mode); } + [(set_attr "type" "arith")]) + ;; ;; .................... ;; @@ -4960,6 +4982,103 @@ { operands[3] = GEN_INT (BITS_PER_WORD - exact_log2 (INTVAL (operands[3]) + 1)); }) +;; The idea here is an equality test of a right shifted value is really +;; just a range test in certain circumstances. +;; +;; In those cases we can save an instruction by adjusting the constant +;; we compare against, and using a slt instruction. This relies on +;; mvconst_internal, so will need adjustment when that goes away. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_eq:X (ashiftrt:X (match_operand:X 1 "register_operand") + (match_operand 2 "const_int_operand")) + (match_operand 3 "const_int_operand"))) + (clobber (match_operand:X 4 "register_operand"))] + "(INTVAL (operands[3]) == 0 + || (INTVAL (operands[3]) + == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U + << (BITS_PER_WORD - 1 - INTVAL (operands[2])))))" + [(const_int 0)] +{ + /* This loads up the constant. We're relying on mvconst_internal here. */ + unsigned HOST_WIDE_INT val = UINTVAL (operands[3]); + val <<= UINTVAL (operands[2]); + val += (HOST_WIDE_INT_1U << UINTVAL (operands[2])); + operands[5] = gen_int_mode (val, word_mode); + if (<CODE> == NE) + val -= 1; + emit_insn (gen_rtx_SET (operands[4], operands[5])); + + /* If we are doing a range test for 0..2^n-1, then our code needs to be + unsigned. If we're doing a range test around the minimum negative + value for the mode, then the code is signed. */ + rtx_code code = operands[3] == CONST0_RTX (word_mode) ? LTU : LT; + + /* EQ/NE alters the order of the operands. */ + rtx rhs_op0 = operands[1]; + rtx rhs_op1 = operands[4]; + if (<CODE> == NE) + std::swap (rhs_op0, rhs_op1); + + rtx x = gen_rtx_fmt_ee (code, word_mode, rhs_op0, rhs_op1); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +;; Another form of the above. +;; It seems like we ought to be able to unify the conditions, at least +;; in concept, even if the precise code is not the same. +(define_split + [(set (match_operand:X 0 "register_operand") + (any_eq:X (and:X (match_operand:X 1 "register_operand") + (match_operand 2 "inverted_p2m1_operand")) + (match_operand 3 "const_int_operand"))) + (clobber (match_operand:X 4 "register_operand"))] + "(INTVAL (operands[3]) == 0 + || INTVAL (operands[3]) == wi::min_value (GET_MODE_PRECISION (word_mode), + SIGNED))" + [(const_int 0)] +{ + unsigned HOST_WIDE_INT val = INTVAL (operands[3]) - INTVAL (operands[2]); + + if (<CODE> == NE) + val -= 1; + + /* This loads up the constant. We're relying on mvconst_internal here. */ + operands[5] = gen_int_mode (val, word_mode); + emit_insn (gen_rtx_SET (operands[4], operands[5])); + + /* If we are doing a range test for 0..2^n-1, then our code needs to be + unsigned. If we're doing a range test around the minimum negative + value for the mode, then the code is signed. */ + rtx_code code = operands[3] == CONST0_RTX (word_mode) ? LTU : LT; + + /* EQ/NE alters the order of the operands. */ + rtx rhs_op0 = operands[1]; + rtx rhs_op1 = operands[4]; + if (<CODE> == NE) + std::swap (rhs_op0, rhs_op1); + + rtx x = gen_rtx_fmt_ee (code, word_mode, rhs_op0, rhs_op1); + emit_insn (gen_rtx_SET (operands[0], x)); + DONE; +}) + +;; So the basic idea here is to realize that after shifting we're just +;; flipping a single bit and the upper bits are just copies of the +;; flipped bit. +(define_split + [(set (match_operand:X 0 "register_operand") + (plus:X (lshiftrt:X (match_operand:X 1 "register_operand") + (match_operand 2 "const_int_operand")) + (match_operand 3 "const_int_operand"))) + (clobber (match_operand:X 4 "register_operand"))] + "(TARGET_ZBS + && UINTVAL (operands[3]) == -(HOST_WIDE_INT_1U << (BITS_PER_WORD - INTVAL (operands[2]) - 1)))" + [(set (match_dup 4) (xor:X (match_dup 1) (match_dup 5))) + (set (match_dup 0) (ashiftrt:X (match_dup 4) (match_dup 2)))] + "{ operands[5] = gen_int_mode (HOST_WIDE_INT_1U << (BITS_PER_WORD - 1), word_mode); } ") + ;; Standard extensions and pattern for optimization (include "bitmanip.md") (include "crypto.md") diff --git a/gcc/match.pd b/gcc/match.pd index 7f16fd4e0814..5b18686672f4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -12261,3 +12261,20 @@ and, (simplify (BUILT_IN_CONSTANT_P (nop_convert@1 @0)) (BUILT_IN_CONSTANT_P @0)) + +(simplify + (ne (rshift (bit_not @0) INTEGER_CST@2) integer_zerop) + (with + { + tree utype = unsigned_type_for (TREE_TYPE (@0)); + tree tem = const_binop (LSHIFT_EXPR, utype, build_one_cst (utype), @2); + } + (le:utype (convert:utype @0) (minus { build_all_ones_cst (utype); } { tem; })))) + +(simplify + (eq (bit_and (bit_xor @0 INTEGER_CST@3) INTEGER_CST@2) integer_zerop) + (with + { + tree utype = unsigned_type_for (TREE_TYPE (@0)); + } + (eq (lshift:utype (convert:utype @0) (CLZ:utype @2)) (lshift:utype @3 (CLZ:utype @2))))) diff --git a/gcc/testsuite/gcc.target/riscv/pr85234.c b/gcc/testsuite/gcc.target/riscv/pr85234.c new file mode 100644 index 000000000000..6b188419f6bc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr85234.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv32gcbv_zicond -mabi=ilp32" { target rv32 } } */ +/* { dg-options "-O2 -march=rv64gcbv_zicond -mabi=lp64" { target rv64 } } */ + +#define N 3 +#define T int +#define cmp == +#define M 0xf0000000u +#define _Bool int +#define true 1 +#define false 0 + +_Bool f(T x, int t) { return (x << N) cmp (M << N); } + +_Bool f1(T x, int t) { return ((x^M) & (-1u>>N)) cmp 0; } + +_Bool f2(T x, int t) { return (x & (-1u>>N)) cmp (M & (-1u>>N)); } + +_Bool g(T x, int t) { return (x >> N) cmp M; } + +_Bool g2(T x, int t) { _Bool tttt = 0; if (tttt) return 0; return (x & (-1u<<N)) cmp (M << N); } + +/* Optimal code for rv32gcb is an li+sh3add+seq+ret for the first three tests + and li+addi+slt+ret for the final two tests. */ +/* { dg-final { scan-assembler-times "li\t" 5 { target rv32 } } } */ +/* { dg-final { scan-assembler-times "sh3add\t" 3 { target rv32 } } } */ +/* { dg-final { scan-assembler-times "seqz\t" 3 { target rv32 } } } */ +/* { dg-final { scan-assembler-times "addi\t" 2 { target rv32 } } } */ +/* { dg-final { scan-assembler-times "slt\t" 2 { target rv32 } } } */ + +/* Optimal code for rv64gcb is the same for the first three tests, but + not achievable as we lose the fact that the upper 32 bits are don't + cares too early. binv+slliw+seq is still a good sequence though. + + We still get a bogus srai for f2 and we don't commonize the final + two tests. */ +/* { dg-final { scan-assembler-times "binvi\t" 3 { target rv64 } } } */ +/* { dg-final { scan-assembler-times "slli\t" 3 { target rv64 } } } */ +/* { dg-final { scan-assembler-times "seqz\t" 5 { target rv64 } } } */ +/* { dg-final { scan-assembler-times "\tli\t" 2 { target rv64 } } } */ +/* { dg-final { scan-assembler-times "srai\t" 1 { target rv64 } } } */ +/* { dg-final { scan-assembler-times "sub\t" 2 { target rv64 } } } */ +/* { dg-final { scan-assembler-times "andi\t" 1 { target rv64 } } } */
