https://gcc.gnu.org/g:ffd580462aa6fc2a86fa4961296ef240b60e1864

commit r17-1984-gffd580462aa6fc2a86fa4961296ef240b60e1864
Author: Jeff Law <[email protected]>
Date:   Mon Jun 29 14:51:24 2026 -0600

    [RISC-V] Improve logical and with some constants where high 32 bits in mask 
are clear
    
    I was playing around with our logical sequences on Friday spurred by a case
    that showed up in a BZ.  In that effort I stumbled over a second class of 
cases
    that's pretty easy to handle.
    
    In general if we need to do a logical AND where the mask is just a series 
of on
    bits in the middle of a word, then that is at worst a 3 instruction 
sequence.
    Logical shift right to clear some number of low bits, logical shift left to
    clear upper bits, logical shift right to put everything into its final
    position.  We already support this.
    
    srliw is an interesting instruction in this space because it can clear the
    upper 32 bits and some number of low bits at the same time.  So let's take 
a &
    0x00000000ffff0000.
    
    We could shift "a" right by 16, left by 48, the right again by 16. But using
    slliw is better.  We just srliw by 16 bits to clear the upper 32 bits as 
well
    as the low 16 bits.  Then slli to put the bits into their final position.  
This
    works for any case where the upper 32 bits are clear and there's a run of 
1s in
    the low 32 bits ending at bit #31.
    
    When this applies we avoid synthesizing the constant and thus trivially 
reduce
    our reliance on mvconst_internal to help clean things up.  This did require
    tightening up an unnamed define_insn_and_split which tried to use zext.[hw] 
to
    do bulk clearing of upper bits.  We just want it to avoid matching for cases
    where the upper 32 bits are clear and we have a run of 1s ending at bit 31 
on
    in the mask.
    
    Bootstrapped and regression tested on the c920 and K3 as well as regression
    tested on riscv64-elf and riscv32-elf.  It's worth noting this sequence 
doesn't
    require any special extension support, so it has the potential to trigger on
    the c920.
    
    Waiting on pre-commit before moving forward.
    
    gcc/
            * config/riscv/riscv.cc (synthesize_and): Use srliw to handle
            clearing both upper and lower bits in some cases.
            * config/riscv/bitmanip.md (ZBA splitter using sext.w): Do not
            use in cases where we can use srliw to clear those upper bits.
    
    gcc/testsuite
            * gcc.target/riscv/and-synthesis-1.c: New test.

Diff:
---
 gcc/config/riscv/bitmanip.md                     | 14 ++++++--
 gcc/config/riscv/riscv.cc                        | 26 +++++++++++++++
 gcc/testsuite/gcc.target/riscv/and-synthesis-1.c | 42 ++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index fd0f85d10f39..992e949a0990 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1177,8 +1177,14 @@
 ;; If we have the ZBA extension, then we can clear the upper half of a 64
 ;; bit object with a zext.w.  So if we have AND where the constant would
 ;; require synthesis of two or more instructions, but 32->64 sign extension
-;; of the constant is a simm12, then we can use zext.w+andi.  If the adjusted
-;; constant is a single bit constant, then we can use zext.w+bclri
+;; of the constant is a simm12, then we can use zext.w+andi.
+;;
+;; If the adjusted constant is a single bit constant, then we can use
+;; zext.w+bclri
+;;
+;; If the original constant uppermost bit was bit 31 and is a consecutive
+;; run of bits, leave the original form alone since it compresses better
+;; a srliw+slli
 ;;
 ;; With the mvconst_internal pattern claiming a single insn to synthesize
 ;; constants, this must be a define_insn_and_split.
@@ -1197,7 +1203,9 @@
       implement with andi or bclri.  */
    && ((SMALL_OPERAND (sext_hwi (INTVAL (operands[2]), 32))
         || (TARGET_ZBS && popcount_hwi (INTVAL (operands[2])) == 31))
-       && INTVAL (operands[2]) != 0x7fffffff)"
+       && INTVAL (operands[2]) != 0x7fffffff)
+   && !(clz_hwi (UINTVAL (operands[2])) == 32
+        && consecutive_bits_operand (operands[2], word_mode))"
   "#"
   "&& 1"
   [(set (match_dup 0) (zero_extend:DI (match_dup 3)))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7806c27ee73b..dda17887019d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -15951,6 +15951,32 @@ synthesize_and (rtx operands[3])
       return true;
     }
 
+  /* For RV64 we can exploit srlw to mask off bits on both the
+     high and low ends, then shift it back into position.  So
+     a two instruction sequence.  */
+  t = UINTVAL (operands[2]);
+  if (TARGET_64BIT
+      && consecutive_bits_operand (operands[2], word_mode)
+      && budget >= 2
+      && clz_hwi (t) == 32)
+    {
+      /* The srliw will wipe the upper 32 bits and low bits at the
+        same time.  */
+      rtx x = gen_rtx_LSHIFTRT (SImode,
+                               gen_lowpart (SImode, operands[1]),
+                               GEN_INT (ctz_hwi (t)));
+      x = gen_rtx_SIGN_EXTEND (DImode, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Now shift it back to its proper position.  */
+      x = gen_rtx_ASHIFT (DImode, input, GEN_INT (ctz_hwi (t)));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+
   /* If we shift right to eliminate the trailing zeros and
      the result is a SMALL_OPERAND, then it's a shift right,
      andi and shift left.  */
diff --git a/gcc/testsuite/gcc.target/riscv/and-synthesis-1.c 
b/gcc/testsuite/gcc.target/riscv/and-synthesis-1.c
new file mode 100644
index 000000000000..5bdeef607845
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/and-synthesis-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile { target rv64 } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" } */
+
+#define T(L,S) unsigned long t_##L##_##S(unsigned long x) { return x & ((((1UL 
<< L) - 1) << S)); }
+
+T(1, 31)
+T(2, 30)
+T(3, 29)
+T(4, 28)
+T(5, 27)
+T(6, 26)
+T(7, 25)
+T(8, 24)
+T(9, 23)
+T(10, 22)
+T(11, 21)
+T(12, 20)
+T(13, 19)
+T(14, 18)
+T(15, 17)
+T(16, 16)
+T(17, 15)
+T(18, 14)
+T(19, 13)
+T(20, 12)
+T(21, 11)
+T(22, 10)
+T(23, 9)
+T(24, 8)
+T(25, 7)
+T(26, 6)
+T(27, 5)
+T(28, 4)
+T(29, 3)
+T(30, 2)
+T(31, 1)
+
+/* { dg-final { scan-assembler-times "\\tsrliw" 30 } } */
+/* { dg-final { scan-assembler-times "\\tslli" 30 } } */
+/* { dg-final { scan-assembler-times "\\tbseti" 1 } } */
+/* { dg-final { scan-assembler-times "\\tand" 1 } } */
+

Reply via email to