Hi Segher and All,

GCC currently emits three instructions (rldic/rldic/or) for RTL of the
form:

  (ior (and (ashift S1 sh1) mask1)
       (and (ashift S2 sh2) mask2))

even when the two masks select disjoint bitfields that can be merged
using a single `rldic` (rotate-and-mask) followed by an `rldimi`
(rotate-and-insert) instruction.

This patch adds a `define_split` pattern that recognizes such cases and
rewrites them into a two-instruction sequence that performs the same
operation more efficiently:

    rldic  dest, S2, sh2, mb2
    rldimi dest, S1, sh1, mb1

The splitter checks that:
  * both rotated fields have valid insert/shift masks,
  * the masks are non-overlapping, and
  * safetty guard to check pseudos can be created at this stage
    (`can_create_pseudo_p()`).

This transformation allows combine to generate the higher-level
(ior(and(rotate), and(rotate))) form, while the new splitter breaks it
down pre-reload into efficient PowerPC field-merge instructions.

Bootstrapped and tested on powerpc64le-linux and power/power64-linux with
no regressions.

2025-11-13  Kishan Parmar  <[email protected]>

gcc/ChangeLog:
        PR target/93171
        * config/rs6000/rs6000.md (New merge field splitter): New define_split
        pattern. Split disjoint rotate-and-mask OR sequences into rldic + rldimi
        when both masks are valid and non-overlapping.
        * testsuite/gcc.target/powerpc/pr93171.c: New testcase.
---
 gcc/config/rs6000/rs6000.md                | 71 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr93171.c | 29 +++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr93171.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index ff085bf9bb1..492507297c0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4410,6 +4410,77 @@
   [(set_attr "type" "insert")
    (set_attr "size" "64")])
 
+; Two forms for rotate-clear + rotate-insert (the two arms of the IOR are
+; not canonicalized, both are an AND so are the same precedence).
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+        (ior:GPR
+         (and:GPR
+          (match_operator:GPR 7 "rotate_mask_operator"
+           [(match_operand:GPR 1 "gpc_reg_operand")
+            (match_operand:SI 2 "const_int_operand")])
+          (match_operand:GPR 3 "const_int_operand"))
+         (and:GPR
+          (match_operator:GPR 8 "rotate_mask_operator"
+           [(match_operand:GPR 4 "gpc_reg_operand")
+            (match_operand:SI 5 "const_int_operand")])
+            (match_operand:GPR 6 "const_int_operand"))))]
+  "rs6000_is_valid_insert_mask (operands[3], operands[7], <MODE>mode)
+   && rs6000_is_valid_shift_mask (operands[6], operands[8], <MODE>mode)
+   && ((UINTVAL (operands[3]) & UINTVAL (operands[6])) == 0)
+   && can_create_pseudo_p ()"
+  [
+   (set (match_dup 9)
+        (and:GPR (match_dup 8)
+                 (match_dup 6)))
+   (set (match_dup 0)
+        (ior:GPR
+          (and:GPR (match_dup 9)
+                   (match_dup 10))
+          (and:GPR (match_dup 7)
+                   (match_dup 3))))
+  ]
+{
+  HOST_WIDE_INT m1 = INTVAL (operands[3]);
+  HOST_WIDE_INT cm = trunc_int_for_mode (~m1, <MODE>mode);
+  operands[9] = gen_reg_rtx (<MODE>mode);
+  operands[10] = GEN_INT (cm);
+})
+
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+        (ior:GPR
+         (and:GPR
+          (match_operator:GPR 7 "rotate_mask_operator"
+           [(match_operand:GPR 1 "gpc_reg_operand")
+            (match_operand:SI 2 "const_int_operand")])
+          (match_operand:GPR 3 "const_int_operand"))
+         (and:GPR
+          (match_operator:GPR 8 "rotate_mask_operator"
+           [(match_operand:GPR 4 "gpc_reg_operand")
+            (match_operand:SI 5 "const_int_operand")])
+            (match_operand:GPR 6 "const_int_operand"))))]
+  "rs6000_is_valid_insert_mask (operands[6], operands[8], <MODE>mode)
+   && rs6000_is_valid_shift_mask (operands[3], operands[7], <MODE>mode)
+   && ((UINTVAL (operands[3]) & UINTVAL (operands[6])) == 0)
+   && can_create_pseudo_p ()"
+  [
+   (set (match_dup 9)
+        (and:GPR (match_dup 7)
+                 (match_dup 3)))
+   (set (match_dup 0)
+        (ior:GPR
+          (and:GPR (match_dup 9)
+                   (match_dup 10))
+          (and:GPR (match_dup 8)
+                   (match_dup 6))))
+  ]
+{
+  HOST_WIDE_INT m1 = INTVAL (operands[6]);
+  HOST_WIDE_INT cm = trunc_int_for_mode (~m1, <MODE>mode);
+  operands[9] = gen_reg_rtx (<MODE>mode);
+  operands[10] = GEN_INT (cm);
+})
 
 ; This handles the important case of multiple-precision shifts.  There is
 ; no canonicalization rule for ASHIFT vs. LSHIFTRT, so two patterns.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93171.c 
b/gcc/testsuite/gcc.target/powerpc/pr93171.c
new file mode 100644
index 00000000000..3e60dc4f89c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93171.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target powerpc64*-*-* } } */
+/* { dg-options "-O2 -m64 -mcpu=power10" } */
+
+typedef unsigned long long uint64_t;
+
+typedef union {
+    uint64_t u64;
+    struct {
+        uint64_t op   : 4;
+        uint64_t type : 3;
+        uint64_t tag  : 32;
+    } s;
+} TagReq;
+
+__attribute__((noinline, noclone))
+void foo(uint64_t *a, int type, int tag)
+{
+    TagReq req;
+    req.u64 = 0;
+
+    req.s.tag  = (unsigned)tag;
+    req.s.type = (unsigned)type;
+
+    *a = req.u64;
+}
+
+/* Expect exactly one rldic and one rldimi.  */
+/* { dg-final { scan-assembler-times {\mrldic(\.)?\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mrldimi(\.)?\M} 1 } } */
-- 
2.47.3

Reply via email to