The following test:

--cut here--
int test (void)
{
  unsigned int sum = 0;

  for (int i = 0; i < 4; i++)
    {
      unsigned int val;

      asm ("magic %0" : "=r" (val) : : "memory");
      sum += val;
    }

  return sum;
}
--cut here--

compiles on x86_64 with -O2 -funroll-all-loops to nonsensical code
where over-eager CSE combines the non-volatile asm despite the fact
that it has a memory clobber, which gcc documentation states means:

     The "memory" clobber tells the compiler that the assembly code
     performs memory reads or writes to items other than those listed
     in the input and output operands (for example, accessing thež
     memory pointed to by one of the input parameters).

so combining the four identical asm statements into one seems to be
actively buggy. The inline asm may not be marked volatile, but it
does clearly tell the compiler that it does memory reads OR WRITES
to operands other than those listed. Which would on the face of it
make the CSE invalid.

The patch avoids simplifications of asms with memory clobber
in postreload.cc/reload_cse_simpify function.

The testcase now compiles to the expected code:

test:
        magic %eax
        magic %edx
        addl    %edx, %eax
        magic %ecx
        addl    %ecx, %eax
        magic %esi
        addl    %esi, %eax
        ret

    PR rtl-optimization/111901

gcc/ChangeLog:

    * postreload.cc (reload_cse_simplify): Avoid simplification of
    asms with memory clobber.

gcc/testsuite/ChangeLog:

    * gcc.dg/pr111901.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK for mainline?

Uros.
diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index 487aa8aad05..493f2d62cca 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -132,12 +132,28 @@ reload_cse_simplify (rtx_insn *insn, rtx testreg)
         we don't try to substitute values for them.  */
       if (asm_noperands (body) >= 0)
        {
+         bool mem_clobber_seen = false;
+
          for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
            {
              rtx part = XVECEXP (body, 0, i);
-             if (GET_CODE (part) == CLOBBER && REG_P (XEXP (part, 0)))
-               cselib_invalidate_rtx (XEXP (part, 0));
+             if (GET_CODE (part) == CLOBBER)
+               switch (GET_CODE (XEXP (part, 0)))
+                 {
+                 case REG:
+                   cselib_invalidate_rtx (XEXP (part, 0));
+                   break;
+                 case MEM:
+                   if (GET_CODE (XEXP (XEXP (part, 0), 0)) == SCRATCH)
+                     mem_clobber_seen = true;
+                   break;
+                 default:
+                   gcc_unreachable ();
+                 }
            }
+
+         if (mem_clobber_seen)
+           return false;
        }
 
       /* If every action in a PARALLEL is a noop, we can delete
diff --git a/gcc/testsuite/gcc.dg/pr111901.c b/gcc/testsuite/gcc.dg/pr111901.c
new file mode 100644
index 00000000000..30a4a30056e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr111901.c
@@ -0,0 +1,20 @@
+/* PR rtl-optimization/111901 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops" } */
+
+int test (void)
+{
+  unsigned int sum = 0;
+
+  for (int i = 0; i < 4; i++)
+    {
+      unsigned int val;
+
+      asm ("magic %0" : "=r" (val) : : "memory");
+      sum += val;
+    }
+
+  return sum;
+}
+
+/* { dg-final { scan-assembler-times "magic" 4 } } */

Reply via email to