Hi Segher and All,
GCC currently emits three instructions (rldic/rldic/or) for RTL of the
form:
(ior (and (ashift S1 sh1) mask1)
(and (ashift S2 sh2) mask2))
even when the two masks select disjoint bitfields that can be merged
using a single `rldic` (rotate-and-mask) followed by an `rldimi`
(rotate-and-insert) instruction.
This patch adds a `define_split` pattern that recognizes such cases and
rewrites them into a two-instruction sequence that performs the same
operation more efficiently:
rldic dest, S2, sh2, mb2
rldimi dest, S1, sh1, mb1
The splitter checks that:
* both rotated fields have valid insert/shift masks,
* the masks are non-overlapping, and
* safetty guard to check pseudos can be created at this stage
(`can_create_pseudo_p()`).
This transformation allows combine to generate the higher-level
(ior(and(rotate), and(rotate))) form, while the new splitter breaks it
down pre-reload into efficient PowerPC field-merge instructions.
Bootstrapped and tested on powerpc64le-linux and power/power64-linux with
no regressions.
2025-11-13 Kishan Parmar <[email protected]>
gcc/ChangeLog:
PR target/93171
* config/rs6000/rs6000.md (New merge field splitter): New define_split
pattern. Split disjoint rotate-and-mask OR sequences into rldic + rldimi
when both masks are valid and non-overlapping.
* testsuite/gcc.target/powerpc/pr93171.c: New testcase.
---
gcc/config/rs6000/rs6000.md | 71 ++++++++++++++++++++++
gcc/testsuite/gcc.target/powerpc/pr93171.c | 29 +++++++++
2 files changed, 100 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr93171.c
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index ff085bf9bb1..492507297c0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4410,6 +4410,77 @@
[(set_attr "type" "insert")
(set_attr "size" "64")])
+; Two forms for rotate-clear + rotate-insert (the two arms of the IOR are
+; not canonicalized, both are an AND so are the same precedence).
+(define_split
+ [(set (match_operand:GPR 0 "gpc_reg_operand")
+ (ior:GPR
+ (and:GPR
+ (match_operator:GPR 7 "rotate_mask_operator"
+ [(match_operand:GPR 1 "gpc_reg_operand")
+ (match_operand:SI 2 "const_int_operand")])
+ (match_operand:GPR 3 "const_int_operand"))
+ (and:GPR
+ (match_operator:GPR 8 "rotate_mask_operator"
+ [(match_operand:GPR 4 "gpc_reg_operand")
+ (match_operand:SI 5 "const_int_operand")])
+ (match_operand:GPR 6 "const_int_operand"))))]
+ "rs6000_is_valid_insert_mask (operands[3], operands[7], <MODE>mode)
+ && rs6000_is_valid_shift_mask (operands[6], operands[8], <MODE>mode)
+ && ((UINTVAL (operands[3]) & UINTVAL (operands[6])) == 0)
+ && can_create_pseudo_p ()"
+ [
+ (set (match_dup 9)
+ (and:GPR (match_dup 8)
+ (match_dup 6)))
+ (set (match_dup 0)
+ (ior:GPR
+ (and:GPR (match_dup 9)
+ (match_dup 10))
+ (and:GPR (match_dup 7)
+ (match_dup 3))))
+ ]
+{
+ HOST_WIDE_INT m1 = INTVAL (operands[3]);
+ HOST_WIDE_INT cm = trunc_int_for_mode (~m1, <MODE>mode);
+ operands[9] = gen_reg_rtx (<MODE>mode);
+ operands[10] = GEN_INT (cm);
+})
+
+(define_split
+ [(set (match_operand:GPR 0 "gpc_reg_operand")
+ (ior:GPR
+ (and:GPR
+ (match_operator:GPR 7 "rotate_mask_operator"
+ [(match_operand:GPR 1 "gpc_reg_operand")
+ (match_operand:SI 2 "const_int_operand")])
+ (match_operand:GPR 3 "const_int_operand"))
+ (and:GPR
+ (match_operator:GPR 8 "rotate_mask_operator"
+ [(match_operand:GPR 4 "gpc_reg_operand")
+ (match_operand:SI 5 "const_int_operand")])
+ (match_operand:GPR 6 "const_int_operand"))))]
+ "rs6000_is_valid_insert_mask (operands[6], operands[8], <MODE>mode)
+ && rs6000_is_valid_shift_mask (operands[3], operands[7], <MODE>mode)
+ && ((UINTVAL (operands[3]) & UINTVAL (operands[6])) == 0)
+ && can_create_pseudo_p ()"
+ [
+ (set (match_dup 9)
+ (and:GPR (match_dup 7)
+ (match_dup 3)))
+ (set (match_dup 0)
+ (ior:GPR
+ (and:GPR (match_dup 9)
+ (match_dup 10))
+ (and:GPR (match_dup 8)
+ (match_dup 6))))
+ ]
+{
+ HOST_WIDE_INT m1 = INTVAL (operands[6]);
+ HOST_WIDE_INT cm = trunc_int_for_mode (~m1, <MODE>mode);
+ operands[9] = gen_reg_rtx (<MODE>mode);
+ operands[10] = GEN_INT (cm);
+})
; This handles the important case of multiple-precision shifts. There is
; no canonicalization rule for ASHIFT vs. LSHIFTRT, so two patterns.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93171.c
b/gcc/testsuite/gcc.target/powerpc/pr93171.c
new file mode 100644
index 00000000000..3e60dc4f89c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93171.c
@@ -0,0 +1,29 @@
+/* { dg-do compile { target powerpc64*-*-* } } */
+/* { dg-options "-O2 -m64 -mcpu=power10" } */
+
+typedef unsigned long long uint64_t;
+
+typedef union {
+ uint64_t u64;
+ struct {
+ uint64_t op : 4;
+ uint64_t type : 3;
+ uint64_t tag : 32;
+ } s;
+} TagReq;
+
+__attribute__((noinline, noclone))
+void foo(uint64_t *a, int type, int tag)
+{
+ TagReq req;
+ req.u64 = 0;
+
+ req.s.tag = (unsigned)tag;
+ req.s.type = (unsigned)type;
+
+ *a = req.u64;
+}
+
+/* Expect exactly one rldic and one rldimi. */
+/* { dg-final { scan-assembler-times {\mrldic(\.)?\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mrldimi(\.)?\M} 1 } } */
--
2.47.3