pr85234] More improvements for 85234

Jeff Law via Gcc-cvs Thu, 12 Mar 2026 10:44:45 -0700

https://gcc.gnu.org/g:18cbdb748ddc3588feb6ad29073c57253a290fa5


commit 18cbdb748ddc3588feb6ad29073c57253a290fa5
Author: Jeff Law <[email protected]>
Date:   Mon Mar 9 08:19:33 2026 -0600

    More improvements for 85234

Diff:
---
 gcc/config/riscv/bitmanip.md             |  32 +++++++++
 gcc/config/riscv/predicates.md           |   8 +++
 gcc/config/riscv/riscv.md                | 119 +++++++++++++++++++++++++++++++
 gcc/match.pd                             |  17 +++++
 gcc/testsuite/gcc.target/riscv/pr85234.c |  43 +++++++++++
 5 files changed, 219 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 0d16d79df3ae..5a12f1ea763a 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1365,3 +1365,35 @@
    (set (match_dup 0) (zero_extract:X (match_dup 3)
                                      (const_int 1)
                                      (zero_extend:X (match_dup 2))))])
+
+
+;; So the basic idea here is to realize that if we just want to test a
+;; single bit in SImode, we can left shift the input by 32 additional
+;; bit positions.  That removes any "junk" in the high order bits.  We
+;; already needed to do a shift, so that's essentially free.  We can
+;; adjust the constant for free as well.
+;;
+;; To test if a register is equal to a constant with a single bit set
+;; we an flip the bit and test against zero.
+(define_insn_and_split ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (any_eq:DI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                             (match_operand 2 "const_int_operand"))
+                  (match_operand 3 "const_int_operand")))
+   (clobber (match_scratch:DI 4 "=&r"))]
+  "(TARGET_64BIT
+    && TARGET_ZBS
+    && exact_log2 (UINTVAL (operands[3]) & 0xffffffff) >= 0)"
+
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4) (xor:DI (match_dup 4) (match_dup 3)))
+   (set (match_dup 0) (any_eq:DI (match_dup 4) (const_int 0)))]
+{
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[2] = gen_int_mode (INTVAL (operands[2]) + 32, QImode);
+  operands[3] = gen_int_mode (UINTVAL (operands[3]) << 32, DImode);
+}
+  [(set_attr "type" "arith")])
+
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 220a6f0830c7..73e6a72ff5be 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -265,6 +265,14 @@
   return true;
  })
 
+;; If we invert every bit are we left with a constant that is 2^n - 1?
+(define_predicate "inverted_p2m1_operand"
+  (match_code "const_int")
+{
+  int val = exact_log2 (~UINTVAL (op) + 1);
+  return val >= 0;
+})
+
 (define_predicate "high_mask_shift_operand"
   (match_code "const_int")
 {
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3fe0ad0ccdf4..075007a7488e 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -906,6 +906,28 @@
   }
   [(set_attr "type" "arith")])
 
+;; The immediately preceeding pattern can act as a bridge to this pattern which
+;; is just a shNadd + seq/sne.  I'd prefer this to be a simple define_split,
+;; but with the pattern above being a define_insn_and_split, that forces this
+;; one to be a define_insn_and_split as well for combine to work.
+(define_insn_and_split ""
+  [(set (match_operand:X 0 "register_operand" "=r")
+       (any_eq:X (ashift:X (match_operand:X 1 "register_operand" "r")
+                           (match_operand 2 "imm123_operand" "Ds3"))
+                 (match_operand 3 "const_int_operand")))
+   (clobber (match_scratch:X 4 "=&r"))]
+  "(TARGET_ZBA
+    && operands[3] != const0_rtx
+    && riscv_const_insns (operands[3], false))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 4) (plus:X (ashift:X (match_dup 1) (match_dup 2))
+                             (match_dup 4)))
+   (set (match_dup 0) (any_eq:X (match_dup 4) (const_int 0)))]
+  { operands[3] = gen_int_mode (-UINTVAL (operands[3]), word_mode); }
+  [(set_attr "type" "arith")])
+
 ;;
 ;;  ....................
 ;;
@@ -4960,6 +4982,103 @@
   { operands[3] = GEN_INT (BITS_PER_WORD
                           - exact_log2 (INTVAL (operands[3]) + 1)); })
 
+;; The idea here is an equality test of a right shifted value is really
+;; just a range test in certain circumstances.
+;;
+;; In those cases we can save an instruction by adjusting the constant
+;; we compare against, and using a slt instruction.  This relies on
+;; mvconst_internal, so will need adjustment when that goes away.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (any_eq:X (ashiftrt:X (match_operand:X 1 "register_operand")
+                             (match_operand 2 "const_int_operand"))
+                 (match_operand 3 "const_int_operand")))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "(INTVAL (operands[3]) == 0
+    || (INTVAL (operands[3])
+       == (HOST_WIDE_INT) (HOST_WIDE_INT_M1U
+                           << (BITS_PER_WORD - 1 - INTVAL (operands[2])))))"
+  [(const_int 0)]
+{ 
+  /* This loads up the constant.  We're relying on mvconst_internal here.  */
+  unsigned HOST_WIDE_INT val = UINTVAL (operands[3]);
+  val <<= UINTVAL (operands[2]);
+  val += (HOST_WIDE_INT_1U << UINTVAL (operands[2]));
+  operands[5] = gen_int_mode (val, word_mode);
+  if (<CODE> == NE)
+    val -= 1;
+  emit_insn (gen_rtx_SET (operands[4], operands[5]));
+
+  /* If we are doing a range test for 0..2^n-1, then our code needs to be
+     unsigned.  If we're doing a range test around the minimum negative
+     value for the mode, then the code is signed.  */
+  rtx_code code = operands[3] == CONST0_RTX (word_mode) ? LTU : LT;
+
+  /* EQ/NE alters the order of the operands.  */
+  rtx rhs_op0 = operands[1];
+  rtx rhs_op1 = operands[4];
+  if (<CODE> == NE)
+    std::swap (rhs_op0, rhs_op1);
+ 
+  rtx x = gen_rtx_fmt_ee (code, word_mode, rhs_op0, rhs_op1);
+  emit_insn (gen_rtx_SET (operands[0], x));
+  DONE;
+})
+
+;; Another form of the above.
+;; It seems like we ought to be able to unify the conditions, at least
+;; in concept, even if the precise code is not the same.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (any_eq:X (and:X (match_operand:X 1 "register_operand")
+                        (match_operand 2 "inverted_p2m1_operand"))
+                 (match_operand 3 "const_int_operand")))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[3]) == wi::min_value (GET_MODE_PRECISION (word_mode),
+                                             SIGNED))"
+  [(const_int 0)]
+{
+  unsigned HOST_WIDE_INT val = INTVAL (operands[3]) - INTVAL (operands[2]);
+
+  if (<CODE> == NE)
+    val -= 1;
+
+  /* This loads up the constant.  We're relying on mvconst_internal here.  */
+  operands[5] = gen_int_mode (val, word_mode); 
+  emit_insn (gen_rtx_SET (operands[4], operands[5]));
+
+  /* If we are doing a range test for 0..2^n-1, then our code needs to be
+     unsigned.  If we're doing a range test around the minimum negative
+     value for the mode, then the code is signed.  */
+  rtx_code code = operands[3] == CONST0_RTX (word_mode) ? LTU : LT;
+
+  /* EQ/NE alters the order of the operands.  */
+  rtx rhs_op0 = operands[1];
+  rtx rhs_op1 = operands[4];
+  if (<CODE> == NE)
+    std::swap (rhs_op0, rhs_op1);
+ 
+  rtx x = gen_rtx_fmt_ee (code, word_mode, rhs_op0, rhs_op1);
+  emit_insn (gen_rtx_SET (operands[0], x));
+  DONE;
+})
+
+;; So the basic idea here is to realize that after shifting we're just
+;; flipping a single bit and the upper bits are just copies of the
+;; flipped bit.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+       (plus:X (lshiftrt:X (match_operand:X 1 "register_operand")
+                           (match_operand 2 "const_int_operand"))
+               (match_operand 3 "const_int_operand")))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "(TARGET_ZBS
+    && UINTVAL (operands[3]) == -(HOST_WIDE_INT_1U << (BITS_PER_WORD - INTVAL 
(operands[2]) - 1)))"
+  [(set (match_dup 4) (xor:X (match_dup 1) (match_dup 5)))
+   (set (match_dup 0) (ashiftrt:X (match_dup 4) (match_dup 2)))]
+  "{ operands[5] = gen_int_mode (HOST_WIDE_INT_1U << (BITS_PER_WORD - 1), 
word_mode); } ")
+
 ;; Standard extensions and pattern for optimization
 (include "bitmanip.md")
 (include "crypto.md")
diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e0814..5b18686672f4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -12261,3 +12261,20 @@ and,
 (simplify
  (BUILT_IN_CONSTANT_P (nop_convert@1 @0))
  (BUILT_IN_CONSTANT_P @0))
+
+(simplify
+  (ne (rshift (bit_not @0) INTEGER_CST@2) integer_zerop)
+  (with
+    {
+      tree utype = unsigned_type_for (TREE_TYPE (@0));
+      tree tem = const_binop (LSHIFT_EXPR, utype, build_one_cst (utype), @2);
+    }
+    (le:utype (convert:utype @0) (minus { build_all_ones_cst (utype); } { tem; 
}))))
+
+(simplify
+  (eq (bit_and (bit_xor @0 INTEGER_CST@3) INTEGER_CST@2) integer_zerop)
+  (with
+    {
+      tree utype = unsigned_type_for (TREE_TYPE (@0));
+    }
+  (eq (lshift:utype (convert:utype @0) (CLZ:utype @2)) (lshift:utype @3 
(CLZ:utype @2)))))
diff --git a/gcc/testsuite/gcc.target/riscv/pr85234.c 
b/gcc/testsuite/gcc.target/riscv/pr85234.c
new file mode 100644
index 000000000000..6b188419f6bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr85234.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gcbv_zicond -mabi=ilp32" { target rv32 } } */
+/* { dg-options "-O2 -march=rv64gcbv_zicond -mabi=lp64" { target rv64 } } */
+
+#define N 3
+#define T int
+#define cmp ==
+#define M 0xf0000000u
+#define _Bool int
+#define true 1
+#define false 0
+
+_Bool f(T x, int t) { return (x << N) cmp (M << N); }
+
+_Bool f1(T x, int t) { return ((x^M) & (-1u>>N)) cmp 0; }
+
+_Bool f2(T x, int t) { return (x & (-1u>>N)) cmp (M & (-1u>>N)); }
+
+_Bool g(T x, int t) { return (x >> N) cmp M; }
+
+_Bool g2(T x, int t) { _Bool tttt = 0; if (tttt) return 0; return (x & 
(-1u<<N)) cmp (M << N); }
+
+/* Optimal code for rv32gcb is an li+sh3add+seq+ret for the first three tests
+   and li+addi+slt+ret for the final two tests.  */
+/* { dg-final { scan-assembler-times "li\t" 5 { target rv32 } } } */
+/* { dg-final { scan-assembler-times "sh3add\t" 3  { target rv32 } } } */
+/* { dg-final { scan-assembler-times "seqz\t" 3  { target rv32 } } } */
+/* { dg-final { scan-assembler-times "addi\t" 2  { target rv32 } } } */
+/* { dg-final { scan-assembler-times "slt\t" 2  { target rv32 } } } */
+
+/* Optimal code for rv64gcb is the same for the first three tests, but
+   not achievable as we lose the fact that the upper 32 bits are don't
+   cares too early.  binv+slliw+seq is still a good sequence though. 
+
+   We still get a bogus srai for f2 and we don't commonize the final
+   two tests.  */
+/* { dg-final { scan-assembler-times "binvi\t" 3 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "slli\t" 3 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "seqz\t" 5 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "\tli\t" 2 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "srai\t" 1 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "sub\t" 2 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "andi\t" 1 { target rv64 } } } */

[gcc/devel/jlaw/pr85234] More improvements for 85234

Reply via email to