Here's the final form of the rest of the pr80770 work for the archives.  Same functionally as the last version, but the simplifier has been moved into its own function to avoid code duplication.

Jeff

commit 684d385720cd5d25df8dc69c5281fc0fb9c3bebe
Author: Shreya Munnangi <[email protected]>
Date:   Sun May 10 21:37:29 2026 -0600

    [RISC-V][PR rtl-optimization/80770] Simplify bit flipping operations down 
to xor
    
    So this is the target independent work to finish resolving pr80770.  It's a
    combination of Shreya's efforts and my own.
    
    To recap, the basic idea is we want to simplify RTL blobs which ultimately 
are
    just flipping a bit.  Consider:
    
    > (set (reg:DI 153)
    >      (ior:DI (and:DI (reg:DI 140 [ *s_4(D) ])
    >              (const_int 254 [0xfe]))
    >          (and:DI (not:DI (reg:DI 140 [ *s_4(D) ]))
    >              (const_int 1 [0x1]))))
    
    The first operand of the IOR clears the low bit of the source register 
leaving
    everything else unchanged.  The second operand of the IOR clears everything 
but
    the low bit and flips the low bit. When we IOR those together we get the
    original value with the lowest bit flipped.  The key is to realize we have 
the
    same pseudo in both arms and there are no bits in common for the constants. 
So
    this works for an arbitrary bit(s) as long as the constants have the right
    form.
    
    That gets us good code on riscv and almost certainly helps other targets.
    There is another form which shows up on the H8 and possibly other targets
    sub-word arithmetic.  op0 and op1 are respectively:
    
    > (gdb) p debug_rtx (op0)
    > (and:QI (reg:QI 24 [ *s_4(D) ])
    >     (const_int 127 [0x7f]))
    > $1 = void
    > (gdb) p debug_rtx (op1)
    > (plus:QI (and:QI (reg:QI 24 [ *s_4(D) ])
    >         (const_int -128 [0xffffffffffffff80]))
    >     (const_int -128 [0xffffffffffffff80]))
    > $2 = void
    
    Note we're in QImode.  op1 just flips the highest QImode bit.  If there are
    carry-outs, we don't really care about them.  The net is we can capture that
    case on the H8 by verifying this form flips the highest bit for the given 
mode.
    Otherwise the carry-outs are relevant and our transformation is incorrect.
    
    Plan is to commit Friday.  While it has been tested with the usual 
bootstraps
    as well as testing on various cross platforms, I'm more comfortable giving
    folks time to take a looksie to see if Shreya or I missed anything critical.
    
    For the testcase in question before/afters look like this:
    
    x86:
            movzbl  (%rdi), %eax
            movl    %eax, %edx
            andl    $-2, %eax
            andl    $1, %edx
            xorl    $1, %edx
            orl     %edx, %eax
            movb    %al, (%rdi)
    
      Turns into:
    
            xorb    $1, (%rdi)
    
    RISC-V:
    
            lbu     a5,0(a0)
            andi    a4,a5,1
            xori    a4,a4,1
            andi    a5,a5,-2
            or      a5,a5,a4
            sb      a5,0(a0)
    
      Turns into:
    
            lbu     a5,0(a0)
            xori    a5,a5,1
            sb      a5,0(a0)
    
            PR rtl-optimization/80770
    gcc/
            * rtl.h (simplify_context::simplify_ior_with_common_term): Add
            new method.
            (simplify_context::simplify_binary_operation_1): Use new method.
            * simplify-rtx.cc (simplify_context::simplify_ior_with_common_term):
            New method.
    
    gcc/testsuite/
    
            * gcc.target/riscv/pr80770.c: New test.
            * gcc.target/riscv/pr80770-2.c: New test.
            * gcc.target/h8300/pr80770.c: New test.
            * gcc.target/h8300/pr80770-2.c: New test.
    
    Co-authored-by: Jeff Law  <[email protected]>

diff --git a/gcc/rtl.h b/gcc/rtl.h
index c1051f48984..d60587dc5ce 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3496,6 +3496,7 @@ public:
                                  rtx, rtx, rtx);
   rtx simplify_relational_operation (rtx_code, machine_mode, machine_mode,
                                     rtx, rtx);
+  rtx simplify_ior_with_common_term (machine_mode, rtx, rtx);
   rtx simplify_subreg (machine_mode, rtx, machine_mode, poly_uint64);
 
   rtx lowpart_subreg (machine_mode, rtx, machine_mode);
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index ee3d9ec3208..392476754a0 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -2734,6 +2734,84 @@ simplify_context::simplify_logical_relational_operation 
(rtx_code code,
   return simplify_gen_relational (code, mode, VOIDmode, op0, op1);
 }
 
+/* We are going to IOR together OP0/OP1.  If there is a common term in OP0/OP1
+   then we may be able to simplify the expression.  We're primarily trying to
+   simplify down to IOR/XOR expression right now, but there may be other
+   simplifications we can do in the future.
+
+   Return the simpified expression or NULL_RTX if no simpification was
+   possible.  */
+rtx
+simplify_context::simplify_ior_with_common_term (machine_mode mode, rtx op0, 
rtx op1)
+{
+  /* (ior X (plus/xor X C)) can be simplified into (ior X C) when
+     X and C have no bits in common.  */
+  if ((GET_CODE (op1) == PLUS || GET_CODE (op1) == XOR)
+      && rtx_equal_p (op0, XEXP (op1, 0))
+      && ((nonzero_bits (op0, GET_MODE (op0))
+         & nonzero_bits (XEXP (op1, 1), GET_MODE (op1))) == 0)
+      && !side_effects_p (op1))
+    return simplify_gen_binary (IOR, mode, op0, XEXP (op1, 1));
+
+  /* (ior (and A C1) (and (not A) C2)) can be converted
+     into (and (xor A C2) (C1 + C2)) when there are no bits
+     in common between C1 and C2.  */
+  if (GET_CODE (op0) == AND
+      && GET_CODE (op1) == AND
+      && GET_CODE (XEXP (op1, 0)) == NOT
+      && rtx_equal_p (XEXP (op0, 0), XEXP (XEXP (op1, 0), 0))
+      && CONST_INT_P (XEXP (op0, 1))
+      && CONST_INT_P (XEXP (op1, 1))
+      && (INTVAL (XEXP (op0, 1)) & INTVAL (XEXP (op1, 1))) == 0)
+    {
+      rtx c = GEN_INT (INTVAL (XEXP (op0, 1)) + INTVAL (XEXP (op1, 1)));
+
+      rtx tem = simplify_gen_binary (XOR, mode, XEXP (op0, 0), XEXP (op1, 1));
+      if (tem)
+       {
+         tem = simplify_gen_binary (AND, mode, tem, c);
+
+         if (tem)
+           return tem;
+       }
+    }
+
+  /* Another variant seen on some target particularly those with
+     sub-word operations.
+
+     (ior (and A C1) (plus (and A C2) C2)) can be simplified into
+     (and (xor (A C2) (C1 + C2).
+
+     Where C2 is the sign bit for A's mode.  So 0x80 for QI,
+     0x8000 for HI, etc.  In this case we know there is no carry
+     from the PLUS into relevant bits of the output.  */
+  if (GET_CODE (op0) == AND
+      && GET_CODE (op1) == PLUS
+      && GET_CODE (XEXP (op1, 0)) == AND
+      && rtx_equal_p (XEXP (op0, 0), XEXP (XEXP (op1, 0), 0))
+      && CONST_INT_P (XEXP (op0, 1))
+      && CONST_INT_P (XEXP (op1, 1))
+      && CONST_INT_P (XEXP (XEXP (op1, 0), 1))
+      && INTVAL (XEXP (op1, 1)) == INTVAL (XEXP (XEXP (op1, 0), 1))
+      && GET_MODE_BITSIZE (GET_MODE (op1)).is_constant ()
+      && ((INTVAL (XEXP (op1, 1)) & GET_MODE_MASK (GET_MODE (op1)))
+         == HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (GET_MODE (op1)).to_constant 
() - 1))
+      && (INTVAL (XEXP (op0, 1)) & INTVAL (XEXP (op1, 1))) == 0)
+    {
+      rtx c = GEN_INT (INTVAL (XEXP (op0, 1)) + INTVAL (XEXP (op1, 1)));
+
+      rtx tem = simplify_gen_binary (XOR, mode, XEXP (op0, 0), XEXP (op1, 1));
+      if (tem)
+       {
+         tem = simplify_gen_binary (AND, mode, tem, c);
+         if (tem)
+           return tem;
+       }
+    }
+  return NULL_RTX;
+}
+
+
 /* Simplify a binary operation CODE with result mode MODE, operating on OP0
    and OP1.  Return 0 if no simplification is possible.
 
@@ -3900,6 +3978,19 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
          && negated_ops_p (XEXP (op0, 0), op1))
        return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
 
+      /* op0/op1 may have a common term which in turn may allow simplification
+        of the the outer IOR.  There are likely other cases we should
+        handle for the outer code as well as the form of the operands.  */
+      tem = simplify_ior_with_common_term (mode, op0, op1);
+      if (tem)
+       return tem;
+
+      /* IOR is commutative and we can't rely on canonicalization at this 
point,
+        so try again to simplify with the operands reversed.  */
+      tem = simplify_ior_with_common_term (mode, op1, op0);
+      if (tem)
+       return tem;
+
       tem = simplify_with_subreg_not (code, mode, op0, op1);
       if (tem)
        return tem;
diff --git a/gcc/testsuite/gcc.target/h8300/pr80770-2.c 
b/gcc/testsuite/gcc.target/h8300/pr80770-2.c
new file mode 100644
index 00000000000..d2b491ed24e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/h8300/pr80770-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=gnu99 -mint32" } */
+
+
+int foop(int x) { x &= 0xf; x |= (x + 0x80); return x; }
+int foox(int x) { x &= 0xf; x |= (x ^ 0x80); return x; }
+
+/* { dg-final { scan-assembler-not "add" } } */
+/* { dg-final { scan-assembler-not "xor" } } */
+/* { dg-final { scan-assembler-times "and" 2 } } */
+/* { dg-final { scan-assembler-times "or" 2 } } */
diff --git a/gcc/testsuite/gcc.target/h8300/pr80770.c 
b/gcc/testsuite/gcc.target/h8300/pr80770.c
new file mode 100644
index 00000000000..bccf2ff66c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/h8300/pr80770.c
@@ -0,0 +1,75 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=gnu99 -mint32" } */
+
+
+struct S {
+  _Bool b0: 1;
+  _Bool b1: 1;
+  _Bool b2: 1;
+  _Bool b3: 1;
+  _Bool b4: 1;
+  _Bool b5: 1;
+  _Bool b6: 1;
+  _Bool b7: 1;
+  _Bool b8: 1;
+  _Bool b9: 1;
+  _Bool b10: 1;
+  _Bool b11: 1;
+  _Bool b12: 1;
+  _Bool b13: 1;
+  _Bool b14: 1;
+  _Bool b15: 1;
+  _Bool b16: 1;
+  _Bool b17: 1;
+  _Bool b18: 1;
+  _Bool b19: 1;
+  _Bool b20: 1;
+  _Bool b21: 1;
+  _Bool b22: 1;
+  _Bool b23: 1;
+  _Bool b24: 1;
+  _Bool b25: 1;
+  _Bool b26: 1;
+  _Bool b27: 1;
+  _Bool b28: 1;
+  _Bool b29: 1;
+  _Bool b30: 1;
+  _Bool b31: 1;
+};
+
+#define T(N) void fb##N (struct S *s) { s->b##N = !s->b##N; }
+
+T(0)
+T(1)
+T(2)
+T(3)
+T(4)
+T(5)
+T(6)
+T(7)
+T(8)
+T(9)
+T(10)
+T(11)
+T(12)
+T(13)
+T(14)
+T(15)
+T(16)
+T(17)
+T(18)
+T(19)
+T(20)
+T(21)
+T(22)
+T(23)
+T(24)
+T(25)
+T(26)
+T(27)
+T(28)
+T(29)
+T(30)
+T(31)
+
+/* { dg-final { scan-assembler-times "xor\t|add.b\t|bnot\t" 32 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr80770-2.c 
b/gcc/testsuite/gcc.target/riscv/pr80770-2.c
new file mode 100644
index 00000000000..1514a49c560
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr80770-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=gnu99" } */
+
+
+int foop(int x) { x &= 0xf; x |= (x + 0x80); return x; }
+int foox(int x) { x &= 0xf; x |= (x ^ 0x80); return x; }
+
+/* { dg-final { scan-assembler-not "add" } } */
+/* { dg-final { scan-assembler-not "xor" } } */
+/* { dg-final { scan-assembler-times "andi\t" 2 } } */
+/* { dg-final { scan-assembler-times "ori\t" 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr80770.c 
b/gcc/testsuite/gcc.target/riscv/pr80770.c
new file mode 100644
index 00000000000..4dafe3955f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr80770.c
@@ -0,0 +1,150 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=gnu99" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" } } */
+
+
+struct S {
+  _Bool b0: 1;
+  _Bool b1: 1;
+  _Bool b2: 1;
+  _Bool b3: 1;
+  _Bool b4: 1;
+  _Bool b5: 1;
+  _Bool b6: 1;
+  _Bool b7: 1;
+  _Bool b8: 1;
+  _Bool b9: 1;
+  _Bool b10: 1;
+  _Bool b11: 1;
+  _Bool b12: 1;
+  _Bool b13: 1;
+  _Bool b14: 1;
+  _Bool b15: 1;
+  _Bool b16: 1;
+  _Bool b17: 1;
+  _Bool b18: 1;
+  _Bool b19: 1;
+  _Bool b20: 1;
+  _Bool b21: 1;
+  _Bool b22: 1;
+  _Bool b23: 1;
+  _Bool b24: 1;
+  _Bool b25: 1;
+  _Bool b26: 1;
+  _Bool b27: 1;
+  _Bool b28: 1;
+  _Bool b29: 1;
+  _Bool b30: 1;
+  _Bool b31: 1;
+  _Bool b32: 1;
+  _Bool b33: 1;
+  _Bool b34: 1;
+  _Bool b35: 1;
+  _Bool b36: 1;
+  _Bool b37: 1;
+  _Bool b38: 1;
+  _Bool b39: 1;
+  _Bool b40: 1;
+  _Bool b41: 1;
+  _Bool b42: 1;
+  _Bool b43: 1;
+  _Bool b44: 1;
+  _Bool b45: 1;
+  _Bool b46: 1;
+  _Bool b47: 1;
+  _Bool b48: 1;
+  _Bool b49: 1;
+  _Bool b50: 1;
+  _Bool b51: 1;
+  _Bool b52: 1;
+  _Bool b53: 1;
+  _Bool b54: 1;
+  _Bool b55: 1;
+  _Bool b56: 1;
+  _Bool b57: 1;
+  _Bool b58: 1;
+  _Bool b59: 1;
+  _Bool b60: 1;
+  _Bool b61: 1;
+  _Bool b62: 1;
+  _Bool b63: 1;
+};
+
+#define T(N) void fb##N (struct S *s) { s->b##N = !s->b##N; }
+
+T(0)
+T(1)
+T(2)
+T(3)
+T(4)
+T(5)
+T(6)
+T(7)
+T(8)
+T(9)
+T(10)
+T(11)
+T(12)
+T(13)
+T(14)
+T(15)
+T(16)
+T(17)
+T(18)
+T(19)
+T(20)
+T(21)
+T(22)
+T(23)
+T(24)
+T(25)
+T(26)
+T(27)
+T(28)
+T(29)
+T(30)
+T(31)
+#if __riscv_xlen == 64
+T(32)
+T(33)
+T(34)
+T(35)
+T(36)
+T(37)
+T(38)
+T(39)
+T(40)
+T(41)
+T(42)
+T(43)
+T(44)
+T(45)
+T(46)
+T(47)
+T(48)
+T(49)
+T(50)
+T(51)
+T(52)
+T(53)
+T(54)
+T(55)
+T(56)
+T(57)
+T(58)
+T(59)
+T(60)
+T(61)
+T(62)
+T(63)
+#endif
+
+/* { dg-final { scan-assembler-times "lbu\t" 64 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "lbu\t" 32 { target rv32 } } } */
+
+/* { dg-final { scan-assembler-times "xori\t" 64 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "xori\t" 32 { target rv32 } } } */
+
+
+/* { dg-final { scan-assembler-times "sb\t" 64 { target rv64 } } } */
+/* { dg-final { scan-assembler-times "sb\t" 32 { target rv32 } } } */

Reply via email to