[RFA][PR target/121778] Improving rotation detection

Jeff Law Sun, 30 Nov 2025 14:34:11 -0800



In this PR we're getting code like this out of the gimple optimizers:

  _1 = a_4(D) << 63;
  _2 = a_4(D) >> 1;
  _3 = _2 ^ 1;
  _5 = _1 | _3;

Note the XOR in that sequence. It spoils our ability to recognize therotation. As a result we get code like this for rv64gcb:

        srli    a5,a0,1
        xori    a5,a5,1
        slli    a0,a0,63
        or      a0,a5,a0

We can reassociate the operations when the XOR only flips bits resultingfrom the right or left shift, but not both. So after reassociation ingimple we get:

  _1 = a_2(D) r>> 1;
  _3 = _1 ^ 1;


Which results in:

        rori    a0,a0,1
        xori    a0,a0,1

We don't bother with the transformation when the XOR is flipping a bitknown to be zero (ie, a high bit of the result of the right shift or alow bit on the result of the left shift). For those cases we alreadyfigure out that the XOR is just an IOR and the right things already"just happen".

This triggered some code generation changes on the SH (not surprisingbecause this BZ was derived from an older SH BZ). It doesn't seem tosignificantly improve the SH code, though it does turn a cmp/pz + rotatethrough carry with a rotate + xor with immediate. That may be alatency win on the SH, I really don't know.

Shreya did the bulk of the work here. My contribution was the sisterpattern which has the XOR on the other operand and testcase development.

Bootstrapped and regression tested on x86 & riscv. Also tested acrossthe various embedded targets without any regressions.


OK for the trunk?

jeff

        PR target/121778
gcc/
        * match.pd: Add pattern to recognize rotate with one or more
        bits flipped via xor.

gcc/testsuite/
        * gcc.target/riscv/pr121778.c: New test.
        * gcc.target/sh/pr59533-1.c: Update expected output.


diff --git a/gcc/match.pd b/gcc/match.pd
index 05c8b59eb9e8..365688f58da3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -12072,3 +12072,27 @@ and,
 (simplify
  (IFN_VEC_SHL_INSERT (vec_duplicate@1 @0) @0)
   @1)
+
+/* In this case the XOR flips bits that originate from the result of the
+   right shift and do not impact the result of the left shift.   We can
+   reassociate the XOR to work on the final result and simplify the rest
+   to a rotate.  */
+(simplify
+  (bit_ior:c (lshift @0 INTEGER_CST@1)
+            (bit_xor (rshift @2 INTEGER_CST@3) INTEGER_CST@4))
+   (if (((~((HOST_WIDE_INT_1U << tree_to_uhwi (@1)) - 1)) & tree_to_uhwi (@4)) 
== 0
+        && (tree_to_uhwi (@1) + tree_to_uhwi (@3)) == TYPE_PRECISION (type)
+        && TYPE_UNSIGNED (type)
+        && @0 == @2)
+    (bit_xor (rrotate @0 @3) @4)))
+
+/* Similarly, but in this case the XOR flips bits that originate from the
+   result of the left shift.  */
+(simplify
+  (bit_ior:c (bit_xor (lshift @0 INTEGER_CST@1) INTEGER_CST@2)
+            (rshift @3 INTEGER_CST@4))
+   (if ((((((HOST_WIDE_INT_1U << tree_to_uhwi (@1)) - 1)) & tree_to_uhwi (@2)) 
== 0)
+        && (tree_to_uhwi (@1) + tree_to_uhwi (@4)) == TYPE_PRECISION (type)
+        && TYPE_UNSIGNED (type)
+        && @0 == @3)
+    (bit_xor (rrotate @0 @4) @2)))
diff --git a/gcc/testsuite/gcc.target/riscv/pr121778.c 
b/gcc/testsuite/gcc.target/riscv/pr121778.c
new file mode 100644
index 000000000000..87da9c3cd962
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr121778.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcb -mabi=lp64d" { target rv64} } */
+/* { dg-options "-O2 -march=rv32gcb -mabi=ilp32" { target rv32} } */
+
+/* We need to adjust the constant so this works for rv32 and rv64.  */
+#if __riscv_xlen == 32
+#define ONE 1U
+#define TYPE unsigned int
+#else
+#define ONE 1UL
+#define TYPE unsigned long
+#endif
+
+#define F1(C) TYPE test_01##C (TYPE a) { return (a << (__riscv_xlen - C)) | 
((a >> C) ^ 1); }
+#define F2(C) TYPE test_02##C (TYPE a) { return ((a >> (__riscv_xlen - C)) ^ 
1) | (a << C); }
+#define F3(C) TYPE test_03##C (TYPE a) { return ((a << (__riscv_xlen - C)) ^ 
(ONE << (__riscv_xlen - 1))) | (a >> C); }
+#define F4(C) TYPE test_04##C (TYPE a) { return (a >> (__riscv_xlen - C)) | 
((a << C) ^ (ONE << (__riscv_xlen - 1))); }
+
+#define F(C) F1(C) F2(C) F3(C) F4(C)
+
+
+F (1)
+F (2)
+F (3)
+F (4)
+F (5)
+F (6)
+F (7)
+F (8)
+F (9)
+F (10)
+F (11)
+F (12)
+F (13)
+F (14)
+F (15)
+F (16)
+F (17)
+F (18)
+F (19)
+F (20)
+F (21)
+F (22)
+F (23)
+F (24)
+F (25)
+F (26)
+F (27)
+F (28)
+F (29)
+F (30)
+F (31)
+#if __riscv_xlen == 64
+F (32)
+F (33)
+F (34)
+F (35)
+F (36)
+F (37)
+F (38)
+F (39)
+F (40)
+F (41)
+F (42)
+F (43)
+F (44)
+F (45)
+F (46)
+F (47)
+F (48)
+F (49)
+F (50)
+F (51)
+F (52)
+F (53)
+F (54)
+F (55)
+F (56)
+F (57)
+F (58)
+F (59)
+F (60)
+F (61)
+F (62)
+F (63)
+
+/* { dg-final { scan-assembler-times "\trori" 252 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "\txori" 126 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "\tbinv" 126 { target { rv64 } } } } */
+
+/* { dg-final { scan-assembler-times "\trori" 124 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "\txori" 62 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "\tbinv" 62 { target { rv32 } } } } */
+#endif
diff --git a/gcc/testsuite/gcc.target/sh/pr59533-1.c 
b/gcc/testsuite/gcc.target/sh/pr59533-1.c
index b0469859df5b..9d0dec91f7cc 100644
--- a/gcc/testsuite/gcc.target/sh/pr59533-1.c
+++ b/gcc/testsuite/gcc.target/sh/pr59533-1.c
@@ -4,11 +4,12 @@
 
 /* { dg-final { scan-assembler-times "shll" 1 } }  */
 /* { dg-final { scan-assembler-times "movt" 5 } }  */
-/* { dg-final { scan-assembler-times "rotcl" 1 } }  */
+/* { dg-final { scan-assembler-times "rotl" 1 } }  */
 /* { dg-final { scan-assembler-times "and" 3 } }  */
 /* { dg-final { scan-assembler-times "extu.b" 5 } }  */
 
-/* { dg-final { scan-assembler-times "cmp/pz" 27 { target { ! sh2a } } } }  */
+/* { dg-final { scan-assembler-times "cmp/pz" 26 { target { ! sh2a } } } }  */
+/* { dg-final { scan-assembler-times "xor" 1 { target { ! sh2a } } } }  */
 /* { dg-final { scan-assembler-times "addc" 4 { target { ! sh2a } } } }  */
 /* { dg-final { scan-assembler-times "subc" 16 { target { ! sh2a } } } }  */

[RFA][PR target/121778] Improving rotation detection

Reply via email to