https://gcc.gnu.org/g:e4b033e232b29786d3a8a74ee4e4d93dd6debe01

commit e4b033e232b29786d3a8a74ee4e4d93dd6debe01
Author: Shreya Munnangi <[email protected]>
Date:   Thu Jan 8 21:29:38 2026 -0700

    [PR target/121778] Improving rotation detection
    
    In this PR we're getting code like this out of the gimple optimizers:
    
    >   _1 = a_4(D) << 63;
    >   _2 = a_4(D) >> 1;
    >   _3 = _2 ^ 1;
    >   _5 = _1 | _3;
    
    Note the XOR in that sequence.  It spoils our ability to recognize the
    rotation.  As a result we get code like this for rv64gcb:
    
    >         srli    a5,a0,1
    >         xori    a5,a5,1
    >         slli    a0,a0,63
    >         or      a0,a5,a0
    
    We can reassociate the operations when the XOR only flips bits resulting 
from
    the right or left shift, but not both.  So after reassociation in gimple we
    get:
    
    >   _1 = a_2(D) r>> 1;
    >   _3 = _1 ^ 1;
    
    Which results in:
    
    >         rori    a0,a0,1
    >         xori    a0,a0,1
    
    We don't bother with the transformation when the XOR is flipping a bit 
known to
    be zero (ie, a high bit of the result of the right shift or a low bit on the
    result of the left shift).  For those cases we already figure out that the 
XOR
    is just an IOR and the right things already "just happen".
    
    This triggered some code generation changes on the SH (not surprising 
because
    this BZ was derived from an older SH BZ).  It doesn't seem to significantly
    improve the SH code, though it does turn a cmp/pz + rotate through carry 
with a
    rotate + xor with immediate.    That may be a latency win on the SH, I 
really
    don't know.
    
    Shreya did the bulk of the work here.  My contribution was the sister 
pattern
    which has the XOR on the other operand and testcase development.
    
    Bootstrapped and regression tested on x86 & riscv.  Also tested across the
    various embedded targets without any regressions.
    
            PR target/121778
    gcc/
            * match.pd: Add pattern to recognize rotate with one or more
            bits flipped via xor.
            * config/sh/sh.md (*rotcl); New variant which handles the output
            we get after the match.pd change above.
    
    gcc/testsuite/
            * gcc.target/riscv/pr121778.c: New test.
    
    Co-Authored-By: Jeff Law <[email protected]>
    (cherry picked from commit 4fbc0bbc03162f3962ea79bac29d36952867c90f)

Diff:
---
 gcc/config/sh/sh.md                       | 19 +++++++
 gcc/match.pd                              | 24 ++++++++
 gcc/testsuite/gcc.target/riscv/pr121778.c | 94 +++++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+)

diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 65b353da56e1..f8003133e954 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -3271,6 +3271,25 @@
   operands[3] = get_t_reg_rtx ();
 })
 
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+       (xor:SI (rotate:SI (match_operand:SI 1 "arith_reg_operand")
+                          (const_int 1))
+               (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+                  (ior:SI (ashift:SI (match_dup 1) (const_int 1))
+                          (and:SI (match_dup 3) (const_int 1))))
+             (clobber (reg:SI T_REG))])]
+{
+  rtx t = gen_rtx_GE (SImode, operands[1], const0_rtx);
+  sh_split_treg_set_expr (t, curr_insn);
+  operands[3] = get_t_reg_rtx ();
+})
+
 (define_insn_and_split "*rotcl"
   [(set (match_operand:SI 0 "arith_reg_dest")
        (ior:SI (and:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand")
diff --git a/gcc/match.pd b/gcc/match.pd
index 50e4d72bbfcd..b569653c7ba1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -11451,3 +11451,27 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* In this case the XOR flips bits that originate from the result of the
+   right shift and do not impact the result of the left shift.   We can
+   reassociate the XOR to work on the final result and simplify the rest
+   to a rotate.  */
+(simplify
+  (bit_ior:c (lshift @0 INTEGER_CST@1)
+            (bit_xor (rshift @2 INTEGER_CST@3) INTEGER_CST@4))
+   (if (((~((HOST_WIDE_INT_1U << tree_to_uhwi (@1)) - 1)) & tree_to_uhwi (@4)) 
== 0
+        && (tree_to_uhwi (@1) + tree_to_uhwi (@3)) == TYPE_PRECISION (type)
+        && TYPE_UNSIGNED (type)
+        && @0 == @2)
+    (bit_xor (rrotate @0 @3) @4)))
+
+/* Similarly, but in this case the XOR flips bits that originate from the
+   result of the left shift.  */
+(simplify
+  (bit_ior:c (bit_xor (lshift @0 INTEGER_CST@1) INTEGER_CST@2)
+            (rshift @3 INTEGER_CST@4))
+   (if ((((((HOST_WIDE_INT_1U << tree_to_uhwi (@1)) - 1)) & tree_to_uhwi (@2)) 
== 0)
+        && (tree_to_uhwi (@1) + tree_to_uhwi (@4)) == TYPE_PRECISION (type)
+        && TYPE_UNSIGNED (type)
+        && @0 == @3)
+    (bit_xor (rrotate @0 @4) @2)))
diff --git a/gcc/testsuite/gcc.target/riscv/pr121778.c 
b/gcc/testsuite/gcc.target/riscv/pr121778.c
new file mode 100644
index 000000000000..87da9c3cd962
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr121778.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcb -mabi=lp64d" { target rv64} } */
+/* { dg-options "-O2 -march=rv32gcb -mabi=ilp32" { target rv32} } */
+
+/* We need to adjust the constant so this works for rv32 and rv64.  */
+#if __riscv_xlen == 32
+#define ONE 1U
+#define TYPE unsigned int
+#else
+#define ONE 1UL
+#define TYPE unsigned long
+#endif
+
+#define F1(C) TYPE test_01##C (TYPE a) { return (a << (__riscv_xlen - C)) | 
((a >> C) ^ 1); }
+#define F2(C) TYPE test_02##C (TYPE a) { return ((a >> (__riscv_xlen - C)) ^ 
1) | (a << C); }
+#define F3(C) TYPE test_03##C (TYPE a) { return ((a << (__riscv_xlen - C)) ^ 
(ONE << (__riscv_xlen - 1))) | (a >> C); }
+#define F4(C) TYPE test_04##C (TYPE a) { return (a >> (__riscv_xlen - C)) | 
((a << C) ^ (ONE << (__riscv_xlen - 1))); }
+
+#define F(C) F1(C) F2(C) F3(C) F4(C)
+
+
+F (1)
+F (2)
+F (3)
+F (4)
+F (5)
+F (6)
+F (7)
+F (8)
+F (9)
+F (10)
+F (11)
+F (12)
+F (13)
+F (14)
+F (15)
+F (16)
+F (17)
+F (18)
+F (19)
+F (20)
+F (21)
+F (22)
+F (23)
+F (24)
+F (25)
+F (26)
+F (27)
+F (28)
+F (29)
+F (30)
+F (31)
+#if __riscv_xlen == 64
+F (32)
+F (33)
+F (34)
+F (35)
+F (36)
+F (37)
+F (38)
+F (39)
+F (40)
+F (41)
+F (42)
+F (43)
+F (44)
+F (45)
+F (46)
+F (47)
+F (48)
+F (49)
+F (50)
+F (51)
+F (52)
+F (53)
+F (54)
+F (55)
+F (56)
+F (57)
+F (58)
+F (59)
+F (60)
+F (61)
+F (62)
+F (63)
+
+/* { dg-final { scan-assembler-times "\trori" 252 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "\txori" 126 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "\tbinv" 126 { target { rv64 } } } } */
+
+/* { dg-final { scan-assembler-times "\trori" 124 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "\txori" 62 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "\tbinv" 62 { target { rv32 } } } } */
+#endif

Reply via email to