Ping!

Please review. 

Thanks & Regards
Kishan



On 13/11/25 12:20 am, Kishan Parmar wrote:
> Hi Segher and All,
>
> GCC currently emits three instructions (rldic/rldic/or) for RTL of the
> form:
>
>   (ior (and (ashift S1 sh1) mask1)
>        (and (ashift S2 sh2) mask2))
>
> even when the two masks select disjoint bitfields that can be merged
> using a single `rldic` (rotate-and-mask) followed by an `rldimi`
> (rotate-and-insert) instruction.
>
> This patch adds a `define_split` pattern that recognizes such cases and
> rewrites them into a two-instruction sequence that performs the same
> operation more efficiently:
>
>     rldic  dest, S2, sh2, mb2
>     rldimi dest, S1, sh1, mb1
>
> The splitter checks that:
>   * both rotated fields have valid insert/shift masks,
>   * the masks are non-overlapping, and
>   * safetty guard to check pseudos can be created at this stage
>     (`can_create_pseudo_p()`).
>
> This transformation allows combine to generate the higher-level
> (ior(and(rotate), and(rotate))) form, while the new splitter breaks it
> down pre-reload into efficient PowerPC field-merge instructions.
>
> Bootstrapped and tested on powerpc64le-linux and power/power64-linux with
> no regressions.
>
> 2025-11-13  Kishan Parmar  <[email protected]>
>
> gcc/ChangeLog:
>       PR target/93171
>       * config/rs6000/rs6000.md (New merge field splitter): New define_split
>       pattern. Split disjoint rotate-and-mask OR sequences into rldic + rldimi
>       when both masks are valid and non-overlapping.
>       * testsuite/gcc.target/powerpc/pr93171.c: New testcase.
> ---
>  gcc/config/rs6000/rs6000.md                | 71 ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/powerpc/pr93171.c | 29 +++++++++
>  2 files changed, 100 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr93171.c
>
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index ff085bf9bb1..492507297c0 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -4410,6 +4410,77 @@
>    [(set_attr "type" "insert")
>     (set_attr "size" "64")])
>  
> +; Two forms for rotate-clear + rotate-insert (the two arms of the IOR are
> +; not canonicalized, both are an AND so are the same precedence).
> +(define_split
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +        (ior:GPR
> +         (and:GPR
> +          (match_operator:GPR 7 "rotate_mask_operator"
> +           [(match_operand:GPR 1 "gpc_reg_operand")
> +            (match_operand:SI 2 "const_int_operand")])
> +          (match_operand:GPR 3 "const_int_operand"))
> +         (and:GPR
> +          (match_operator:GPR 8 "rotate_mask_operator"
> +           [(match_operand:GPR 4 "gpc_reg_operand")
> +            (match_operand:SI 5 "const_int_operand")])
> +            (match_operand:GPR 6 "const_int_operand"))))]
> +  "rs6000_is_valid_insert_mask (operands[3], operands[7], <MODE>mode)
> +   && rs6000_is_valid_shift_mask (operands[6], operands[8], <MODE>mode)
> +   && ((UINTVAL (operands[3]) & UINTVAL (operands[6])) == 0)
> +   && can_create_pseudo_p ()"
> +  [
> +   (set (match_dup 9)
> +        (and:GPR (match_dup 8)
> +                 (match_dup 6)))
> +   (set (match_dup 0)
> +        (ior:GPR
> +          (and:GPR (match_dup 9)
> +                   (match_dup 10))
> +          (and:GPR (match_dup 7)
> +                   (match_dup 3))))
> +  ]
> +{
> +  HOST_WIDE_INT m1 = INTVAL (operands[3]);
> +  HOST_WIDE_INT cm = trunc_int_for_mode (~m1, <MODE>mode);
> +  operands[9] = gen_reg_rtx (<MODE>mode);
> +  operands[10] = GEN_INT (cm);
> +})
> +
> +(define_split
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +        (ior:GPR
> +         (and:GPR
> +          (match_operator:GPR 7 "rotate_mask_operator"
> +           [(match_operand:GPR 1 "gpc_reg_operand")
> +            (match_operand:SI 2 "const_int_operand")])
> +          (match_operand:GPR 3 "const_int_operand"))
> +         (and:GPR
> +          (match_operator:GPR 8 "rotate_mask_operator"
> +           [(match_operand:GPR 4 "gpc_reg_operand")
> +            (match_operand:SI 5 "const_int_operand")])
> +            (match_operand:GPR 6 "const_int_operand"))))]
> +  "rs6000_is_valid_insert_mask (operands[6], operands[8], <MODE>mode)
> +   && rs6000_is_valid_shift_mask (operands[3], operands[7], <MODE>mode)
> +   && ((UINTVAL (operands[3]) & UINTVAL (operands[6])) == 0)
> +   && can_create_pseudo_p ()"
> +  [
> +   (set (match_dup 9)
> +        (and:GPR (match_dup 7)
> +                 (match_dup 3)))
> +   (set (match_dup 0)
> +        (ior:GPR
> +          (and:GPR (match_dup 9)
> +                   (match_dup 10))
> +          (and:GPR (match_dup 8)
> +                   (match_dup 6))))
> +  ]
> +{
> +  HOST_WIDE_INT m1 = INTVAL (operands[6]);
> +  HOST_WIDE_INT cm = trunc_int_for_mode (~m1, <MODE>mode);
> +  operands[9] = gen_reg_rtx (<MODE>mode);
> +  operands[10] = GEN_INT (cm);
> +})
>  
>  ; This handles the important case of multiple-precision shifts.  There is
>  ; no canonicalization rule for ASHIFT vs. LSHIFTRT, so two patterns.
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93171.c 
> b/gcc/testsuite/gcc.target/powerpc/pr93171.c
> new file mode 100644
> index 00000000000..3e60dc4f89c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93171.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile { target powerpc64*-*-* } } */
> +/* { dg-options "-O2 -m64 -mcpu=power10" } */
> +
> +typedef unsigned long long uint64_t;
> +
> +typedef union {
> +    uint64_t u64;
> +    struct {
> +        uint64_t op   : 4;
> +        uint64_t type : 3;
> +        uint64_t tag  : 32;
> +    } s;
> +} TagReq;
> +
> +__attribute__((noinline, noclone))
> +void foo(uint64_t *a, int type, int tag)
> +{
> +    TagReq req;
> +    req.u64 = 0;
> +
> +    req.s.tag  = (unsigned)tag;
> +    req.s.type = (unsigned)type;
> +
> +    *a = req.u64;
> +}
> +
> +/* Expect exactly one rldic and one rldimi.  */
> +/* { dg-final { scan-assembler-times {\mrldic(\.)?\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mrldimi(\.)?\M} 1 } } */

Reply via email to