So this is the target independent work to finish resolving pr80770.
It's a combination of Shreya's efforts and my own.
To recap, the basic idea is we want to simplify RTL blobs which
ultimately are just flipping a bit. Consider:
(set (reg:DI 153)
(ior:DI (and:DI (reg:DI 140 [ *s_4(D) ])
(const_int 254 [0xfe]))
(and:DI (not:DI (reg:DI 140 [ *s_4(D) ]))
(const_int 1 [0x1]))))
The first operand of the IOR clears the low bit of the source register
leaving everything else unchanged. The second operand of the IOR clears
everything but the low bit and flips the low bit. When we IOR those
together we get the original value with the lowest bit flipped. The key
is to realize we have the same pseudo in both arms and there are no bits
in common for the constants. So this works for an arbitrary bit(s) as
long as the constants have the right form.
That gets us good code on riscv and almost certainly helps other
targets. There is another form which shows up on the H8 and possibly
other targets sub-word arithmetic. op0 and op1 are respectively:
(gdb) p debug_rtx (op0)
(and:QI (reg:QI 24 [ *s_4(D) ])
(const_int 127 [0x7f]))
$1 = void
(gdb) p debug_rtx (op1)
(plus:QI (and:QI (reg:QI 24 [ *s_4(D) ])
(const_int -128 [0xffffffffffffff80]))
(const_int -128 [0xffffffffffffff80]))
$2 = void
Note we're in QImode. op1 just flips the highest QImode bit. If there
are carry-outs, we don't really care about them. The net is we can
capture that case on the H8 by verifying this form flips the highest bit
for the given mode. Otherwise the carry-outs are relevant and our
transformation is incorrect.
Plan is to commit Friday. While it has been tested with the usual
bootstraps as well as testing on various cross platforms, I'm more
comfortable giving folks time to take a looksie to see if Shreya or I
missed anything critical.
For the testcase in question before/afters look like this:
x86:
movzbl (%rdi), %eax
movl %eax, %edx
andl $-2, %eax
andl $1, %edx
xorl $1, %edx
orl %edx, %eax
movb %al, (%rdi)
Turns into:
xorb $1, (%rdi)
RISC-V:
lbu a5,0(a0)
andi a4,a5,1
xori a4,a4,1
andi a5,a5,-2
or a5,a5,a4
sb a5,0(a0)
Turns into:
lbu a5,0(a0)
xori a5,a5,1
sb a5,0(a0)
Jeff
PR rtl-optimization/80770
gcc/
* simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
Identify and optimize cases where an IOR is just a bit flip.
gcc/testsuite
* gcc.target/riscv/pr80770.c: New test.
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index bf625cdaf608..365bb9db1930 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3897,6 +3897,101 @@ simplify_context::simplify_binary_operation_1 (rtx_code
code,
&& negated_ops_p (XEXP (op0, 0), op1))
return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+ /* (ior (and (A C1) (and (not (A) C2))) can be converted
+ into (and (xor (A C2) (C1 + C2))) when there are no bits
+ in common between C1 and C2. */
+ if (GET_CODE (op0) == AND
+ && GET_CODE (op1) == AND
+ && GET_CODE (XEXP (op1, 0)) == NOT
+ && rtx_equal_p (XEXP (op0, 0), XEXP (XEXP (op1, 0), 0))
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (XEXP (op1, 1))
+ && (INTVAL (XEXP (op0, 1)) & INTVAL (XEXP (op1, 1))) == 0)
+ {
+ rtx c = GEN_INT (INTVAL (XEXP (op0, 1)) + INTVAL (XEXP (op1, 1)));
+
+ tem = simplify_gen_binary (XOR, mode, XEXP (op0, 0), XEXP (op1, 1));
+ if (tem)
+ {
+ tem = simplify_gen_binary (AND, mode, tem, c);
+ if (tem)
+ return tem;
+ }
+ }
+
+ /* Same thing, but operand order is reversed for the outer IOR. */
+ if (GET_CODE (op0) == AND
+ && GET_CODE (op1) == AND
+ && GET_CODE (XEXP (op0, 0)) == NOT
+ && rtx_equal_p (XEXP (op1, 0), XEXP (XEXP (op0, 0), 0))
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (XEXP (op1, 1))
+ && (INTVAL (XEXP (op0, 1)) & INTVAL (XEXP (op1, 1))) == 0)
+ {
+ rtx c = GEN_INT (INTVAL (XEXP (op0, 1)) + INTVAL (XEXP (op1, 1)));
+
+ tem = simplify_gen_binary (XOR, mode, XEXP (op1, 0), XEXP (op0, 1));
+ if (tem)
+ {
+ tem = simplify_gen_binary (AND, mode, tem, c);
+ if (tem)
+ return tem;
+ }
+ }
+
+ /* Another variant seen on some backends, particularly those with
+ sub-word operations. For these cases we have to know there is no
+ carry from the PLUS into relevant bits. In practice that means
+ it's only valid for the uppermost bit. */
+ if (GET_CODE (op0) == AND
+ && GET_CODE (op1) == PLUS
+ && GET_CODE (XEXP (op1, 0)) == AND
+ && rtx_equal_p (XEXP (op0, 0), XEXP (XEXP (op1, 0), 0))
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (XEXP (op1, 1))
+ && CONST_INT_P (XEXP (XEXP (op1, 0), 1))
+ && INTVAL (XEXP (op1, 1)) == INTVAL (XEXP (XEXP (op1, 0), 1))
+ && GET_MODE_BITSIZE (GET_MODE (op1)).is_constant ()
+ && ((INTVAL (XEXP (op1, 1)) & GET_MODE_MASK (GET_MODE (op1)))
+ == HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (GET_MODE
(op1)).to_constant () - 1))
+ && (INTVAL (XEXP (op0, 1)) & INTVAL (XEXP (op1, 1))) == 0)
+ {
+ rtx c = GEN_INT (INTVAL (XEXP (op0, 1)) + INTVAL (XEXP (op1, 1)));
+
+ tem = simplify_gen_binary (XOR, mode, XEXP (op0, 0), XEXP (op1, 1));
+ if (tem)
+ {
+ tem = simplify_gen_binary (AND, mode, tem, c);
+ if (tem)
+ return tem;
+ }
+ }
+
+ /* And its variant with the operands of the outer AND reversed. */
+ if (GET_CODE (op1) == AND
+ && GET_CODE (op0) == PLUS
+ && GET_CODE (XEXP (op0, 0)) == AND
+ && rtx_equal_p (XEXP (op1, 0), XEXP (XEXP (op0, 0), 0))
+ && CONST_INT_P (XEXP (op1, 1))
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (XEXP (XEXP (op0, 0), 1))
+ && INTVAL (XEXP (op0, 1)) == INTVAL (XEXP (XEXP (op0, 0), 1))
+ && GET_MODE_BITSIZE (GET_MODE (op0)).is_constant ()
+ && ((INTVAL (XEXP (op0, 1)) & GET_MODE_MASK (GET_MODE (op0)))
+ == HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (GET_MODE
(op0)).to_constant () - 1))
+ && (INTVAL (XEXP (op1, 1)) & INTVAL (XEXP (op0, 1))) == 0)
+ {
+ rtx c = GEN_INT (INTVAL (XEXP (op1, 1)) + INTVAL (XEXP (op0, 1)));
+
+ tem = simplify_gen_binary (XOR, mode, XEXP (op1, 0), XEXP (op0, 1));
+ if (tem)
+ {
+ tem = simplify_gen_binary (AND, mode, tem, c);
+ if (tem)
+ return tem;
+ }
+ }
+
tem = simplify_with_subreg_not (code, mode, op0, op1);
if (tem)
return tem;
diff --git a/gcc/testsuite/gcc.target/riscv/pr80770.c
b/gcc/testsuite/gcc.target/riscv/pr80770.c
new file mode 100644
index 000000000000..4dafe3955f05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr80770.c
@@ -0,0 +1,150 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=gnu99" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" } } */
+
+
+struct S {
+ _Bool b0: 1;
+ _Bool b1: 1;
+ _Bool b2: 1;
+ _Bool b3: 1;
+ _Bool b4: 1;
+ _Bool b5: 1;
+ _Bool b6: 1;
+ _Bool b7: 1;
+ _Bool b8: 1;
+ _Bool b9: 1;
+ _Bool b10: 1;
+ _Bool b11: 1;
+ _Bool b12: 1;
+ _Bool b13: 1;
+ _Bool b14: 1;
+ _Bool b15: 1;
+ _Bool b16: 1;
+ _Bool b17: 1;
+ _Bool b18: 1;
+ _Bool b19: 1;
+ _Bool b20: 1;
+ _Bool b21: 1;
+ _Bool b22: 1;
+ _Bool b23: 1;
+ _Bool b24: 1;
+ _Bool b25: 1;
+ _Bool b26: 1;
+ _Bool b27: 1;
+ _Bool b28: 1;
+ _Bool b29: 1;
+ _Bool b30: 1;
+ _Bool b31: 1;
+ _Bool b32: 1;
+ _Bool b33: 1;
+ _Bool b34: 1;
+ _Bool b35: 1;
+ _Bool b36: 1;
+ _Bool b37: 1;
+ _Bool b38: 1;
+ _Bool b39: 1;
+ _Bool b40: 1;
+ _Bool b41: 1;
+ _Bool b42: 1;
+ _Bool b43: 1;
+ _Bool b44: 1;
+ _Bool b45: 1;
+ _Bool b46: 1;
+ _Bool b47: 1;
+ _Bool b48: 1;
+ _Bool b49: 1;
+ _Bool b50: 1;
+ _Bool b51: 1;
+ _Bool b52: 1;
+ _Bool b53: 1;
+ _Bool b54: 1;
+ _Bool b55: 1;
+ _Bool b56: 1;
+ _Bool b57: 1;
+ _Bool b58: 1;
+ _Bool b59: 1;
+ _Bool b60: 1;
+ _Bool b61: 1;
+ _Bool b62: 1;
+ _Bool b63: 1;
+};
+
+#define T(N) void fb##N (struct S *s) { s->b##N = !s->b##N; }
+
+T(0)
+T(1)
+T(2)
+T(3)
+T(4)
+T(5)
+T(6)
+T(7)
+T(8)
+T(9)
+T(10)
+T(11)
+T(12)
+T(13)
+T(14)
+T(15)
+T(16)
+T(17)
+T(18)
+T(19)
+T(20)
+T(21)
+T(22)
+T(23)
+T(24)
+T(25)
+T(26)
+T(27)
+T(28)
+T(29)
+T(30)
+T(31)
+#if __riscv_xlen == 64
+T(32)
+T(33)
+T(34)
+T(35)
+T(36)
+T(37)
+T(38)
+T(39)
+T(40)
+T(41)
+T(42)
+T(43)
+T(44)
+T(45)
+T(46)
+T(47)
+T(48)
+T(49)
+T(50)
+T(51)
+T(52)
+T(53)
+T(54)
+T(55)
+T(56)
+T(57)
+T(58)
+T(59)
+T(60)
+T(61)
+T(62)
+T(63)
+#endif
+
+/* { dg-final { scan-assembler-times "lbu\t" 64 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "lbu\t" 32 { target rv32 } } } */
+
+/* { dg-final { scan-assembler-times "xori\t" 64 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "xori\t" 32 { target rv32 } } } */
+
+
+/* { dg-final { scan-assembler-times "sb\t" 64 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "sb\t" 32 { target rv32 } } } */