https://gcc.gnu.org/g:06daaf8fad9e94a2c2399b1e7208c87a134711ed

commit r17-687-g06daaf8fad9e94a2c2399b1e7208c87a134711ed
Author: Philipp Tomsich <[email protected]>
Date:   Sat May 23 10:31:11 2026 -0600

    [RISC-V] Improve slli+zext+andi sequence for RISC-V
    
    So this is another patch mostly from the VRULL team.  Given something like
    this:
    
    > #define T int
    > typedef long unsigned int size_t;
    > extern void *xcalloc (size_t, size_t) ;
    > typedef struct sparseset_def
    > {
    >   unsigned T *dense;
    >   unsigned T *sparse;
    >   unsigned T members;
    >   unsigned T size;
    >   unsigned T iter;
    >   unsigned char iter_inc;
    >   unsigned char iterating;
    >   unsigned T elms[2];
    > } *sparseset;
    > sparseset
    > sparseset_alloc (unsigned T n_elms)
    > {
    >   unsigned T n_bytes = sizeof (struct sparseset_def)
    >     + ((n_elms - 1) * 2 * sizeof (unsigned T));
    >   sparseset set = (sparseset) xcalloc (1, n_bytes);
    >   return set;
    > }
    
    It currently compiles into this with rv64gcb:
    
    >         addi    a1,a0,4
    >         slli    a1,a1,3
    >         zext.w  a1,a1
    >         andi    a1,a1,-8
    >         li      a0,1
    >         tail    xcalloc
    But we can do better.  In particular the slli+zext+andi sequence can be
    improved into:
    
    >         addi    a1,a0,4
    >         slli    a5,a1,35
    >         srli    a1,a5,32
    >         li      a0,1
    >         tail    xcalloc
    
    The new pattern needs to be a define_insn_and_split due to a chain of
    define_insn_and_split patterns that start with mvconst_internal 🙁
    
    To avoid regressing zba-shadd.c I had to turn an existing define_split into 
a
    define_insn_and_split 🙁 🙁
    
    This has been regression tested on riscv32-elf and riscv64-elf.  It's been
    bootstrapped and regression tested on the K1 design (where is likely 
triggered
    a few times during bootstrap) and on the Pioneer (which doesn't have Zba, so
    this pattern should never trigger).  Waiting on pre-commit testing's 
verdict.
    
    gcc/
    
            * config/riscv/bitmanip.md (slli_slli_uw): New pattern.
            (plus+and+ashift splitter): Turn into define_insn_and_split.
            (riscv_slli_uw): Renamed from *slliuw.
    
    gcc/testsuite
            * gcc.target/riscv/and-shift-1.c: New test.

Diff:
---
 gcc/config/riscv/bitmanip.md                 | 114 +++++++++++++++++++--------
 gcc/testsuite/gcc.target/riscv/and-shift-1.c |  35 ++++++++
 2 files changed, 118 insertions(+), 31 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index c1a106b50c90..980bc4acf587 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -123,6 +123,54 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "DI")])
 
+;; A shift-left, zext.w, shift-left sequence should turn into a
+;; shift-left followed by slli.uw.
+;; The "TARGET_ZBA && clz_hwi (operands[3]) <= 32" check in the
+;; "*zero_extendsidi2_shifted" pattern over in riscv.md ensures
+;; that we fall through to here, if appropriate.
+;;
+;; Due to the anonymous pattern which utilizes zext.w to clear
+;; the upper half of a 64bit register and avoid constant synthesis
+;; this must be a define_insn_and_split for now.
+;;
+(define_insn_and_split "*slli_slli_uw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+                          (match_operand:QI 2 "dimode_shift_operand" ""))
+               (match_operand:DI 3 "consecutive_bits_operand" "")))
+   (clobber (match_scratch:DI 4 "=&r"))]
+  "TARGET_64BIT && TARGET_ZBA
+   && popcount_hwi (INTVAL (operands[3])) < 32
+   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+   && IN_RANGE (clz_hwi (INTVAL (operands[3])), 29, 32)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+  /* scale: shamt for the slli.uw */
+  int scale = 32 - clz_hwi (mask);
+  /* bias:  shamt for the prior shift (can be zero) */
+  int bias = ctz_hwi (mask) - scale;
+
+  /* Don't emit a zero count shift.  Nothing post-reload will clean
+     that up.  */
+  if (bias != 0)
+    emit_insn (gen_rtx_SET (operands[4],
+                           gen_rtx_ASHIFT (DImode, operands[1],
+                                           GEN_INT (bias))));
+
+  /* If BIAS was zero, then the source is still in operands[1], else
+     it's in the scratch register.  */
+  emit_insn (gen_riscv_slli_uw (operands[0],
+                               bias ? operands[4] : operands[1],
+                               GEN_INT (scale),
+                               GEN_INT (HOST_WIDE_INT_C (0xffffffff) << 
scale)));
+  DONE;
+}
+  [(set_attr "type" "bitmanip")])
+
+
 ;; During combine, we may encounter an attempt to combine
 ;;   slli rtmp, rs, #imm
 ;;   zext.w rtmp, rtmp
@@ -130,42 +178,46 @@
 ;; which will lead to the immediate not satisfying the above constraints.
 ;; By splitting the compound expression, we can simplify to a slli and a
 ;; sh[123]add.uw.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-       (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
-                                   (match_operand:QI 2 "immediate_operand"))
-                        (match_operand:DI 3 "consecutive_bits_operand"))
-                (match_operand:DI 4 "register_operand")))
-   (clobber (match_operand:DI 5 "register_operand"))]
-  "TARGET_64BIT && TARGET_ZBA"
+
+;; To match this target sequence, the final result must be shifted
+;; using the sh[123]add.uw instruction by 1, 2 or 3 bits into the high
+;; word.  To test for this property, we count the leading zero-bits of
+;; the mask (which must be in the range [29, 31]).
+
+(define_insn_and_split "*shift_then_shNadd.uw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+                                   (match_operand:QI 2 "dimode_shift_operand" 
""))
+                        (match_operand:DI 3 "consecutive_bits_operand" ""))
+                (match_operand:DI 4 "register_operand" "r")))
+   (clobber (match_scratch:DI 5 "=&r"))]
+  "TARGET_64BIT && TARGET_ZBA
+   && riscv_shamt_matches_mask_p (UINTVAL (operands[2]), UINTVAL (operands[3]))
+   && IN_RANGE (clz_hwi (UINTVAL (operands[3])), 29, 31)"
+  "#"
+  "&& reload_completed"
   [(set (match_dup 5) (ashift:DI (match_dup 1) (match_dup 6)))
    (set (match_dup 0) (plus:DI (and:DI (ashift:DI (match_dup 5)
                                                  (match_dup 7))
                                       (match_dup 8))
                               (match_dup 4)))]
 {
-       unsigned HOST_WIDE_INT mask = UINTVAL (operands[3]);
-       /* scale: shift within the sh[123]add.uw */
-       unsigned HOST_WIDE_INT scale = 32 - clz_hwi (mask);
-       /* bias:  pre-scale amount (i.e. the prior shift amount) */
-       int bias = ctz_hwi (mask) - scale;
-
-       /* If the bias + scale don't add up to operand[2], reject. */
-       if ((scale + bias) != UINTVAL (operands[2]))
-          FAIL;
-
-       /* If the shift-amount is out-of-range for sh[123]add.uw, reject. */
-       if ((scale < 1) || (scale > 3))
-          FAIL;
-
-       /* If there's no bias, the '*shNadduw' pattern should have matched. */
-       if (bias == 0)
-          FAIL;
-
-       operands[6] = GEN_INT (bias);
-       operands[7] = GEN_INT (scale);
-       operands[8] = GEN_INT (0xffffffffULL << scale);
-})
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+  /* scale: shamt for the sh[123]add.uw */
+  unsigned HOST_WIDE_INT scale = 32 - clz_hwi (mask);
+  /* bias:  shamt for the prior shift */
+  unsigned HOST_WIDE_INT bias = ctz_hwi (mask) - scale;
+
+  /* If there's no bias, the '*shNadduw' pattern should have matched.  */
+  if (bias == 0)
+    FAIL;
+
+  operands[6] = GEN_INT (bias);
+  operands[7] = GEN_INT (scale);
+  operands[8] = GEN_INT (HOST_WIDE_INT_C (0xffffffff) << scale);
+}
+  [(set_attr "type" "bitmanip")])
+
 
 (define_insn "*add.uw"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -177,7 +229,7 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "DI")])
 
-(define_insn "*slliuw"
+(define_insn "riscv_slli_uw"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
                           (match_operand:QI 2 "immediate_operand" "I"))
diff --git a/gcc/testsuite/gcc.target/riscv/and-shift-1.c 
b/gcc/testsuite/gcc.target/riscv/and-shift-1.c
new file mode 100644
index 000000000000..f26da8892d90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/and-shift-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-DT=int -march=rv64gc_zba -mabi=lp64d 
-mbranch-cost=4" { target rv64 } } */
+/* { dg-additional-options "-DT=short -march=rv32gc_zba -mabi=ilp32 
-mbranch-cost=4" { target rv32 } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+typedef long unsigned int size_t;
+extern void *xcalloc (size_t, size_t) ;
+typedef struct sparseset_def
+{
+  unsigned T *dense;
+  unsigned T *sparse;
+  unsigned T members;
+  unsigned T size;
+  unsigned T iter;
+  unsigned char iter_inc;
+  unsigned char iterating;
+  unsigned T elms[2];
+} *sparseset;
+sparseset
+sparseset_alloc (unsigned T n_elms)
+{
+  unsigned T n_bytes = sizeof (struct sparseset_def)
+    + ((n_elms - 1) * 2 * sizeof (unsigned T));
+  sparseset set = (sparseset) xcalloc (1, n_bytes);
+  return set;
+}
+
+
+
+
+/* { dg-final { scan-assembler "slli\t" } } */
+/* { dg-final { scan-assembler "srli\t" } } */
+/* { dg-final { scan-assembler-not "zext.w\t" } } */
+/* { dg-final { scan-assembler-not "andi\t" } } */
+

Reply via email to