In this PR, we have a few insns involved in two instruction combinations:

insn 16: set r100
insn 27: some calculation
insn 28: some calculation
insn 32: using r100
insn 33: using r100
insn 35: some calculation

Then we combine insns 27, 28 and 33, producing two output insns, As a result, insn 28 (i2) now obtains a use of r100. But insn 32, which is not involved in this combination, has the LOG_LINKS entry for that register, and we don't know that we need to update it. As a result, the second combination, involving regs 16, 32 and 35 (based on the remaining LOG_LINK for r100), produces incorrect code, as we don't realize there's a use of r100 before insn 32.

The following fixes it. Bootstrapped and tested on x86_64-linux, ok (on all branches)?


Bernd

	PR rtl-optimization/79910
	* combine.c (record_used_regs): New static function.
	(try_combine): Handle situations where there is an additional
	instruction between I2 and I3 which needs to have a LOG_LINK
	updated.

	PR rtl-optimization/79910
	* gcc.dg/torture/pr79910.c: New test.

Index: gcc/combine.c
===================================================================
--- gcc/combine.c	(revision 245685)
+++ gcc/combine.c	(working copy)
@@ -2559,6 +2560,57 @@ can_split_parallel_of_n_reg_sets (rtx_in
   return true;
 }
 
+/* Set up a set of registers used in an insn.  Called through note_uses,
+   arguments as described for that function.  */
+
+static void
+record_used_regs (rtx *xptr, void *data)
+{
+  bitmap set = (bitmap)data;
+  int i, j;
+  enum rtx_code code;
+  const char *fmt;
+  rtx x = *xptr;
+
+  /* repeat is used to turn tail-recursion into iteration since GCC
+     can't do it when there's no return value.  */
+ repeat:
+  if (x == 0)
+    return;
+
+  code = GET_CODE (x);
+  if (REG_P (x))
+    {
+      unsigned regno = REGNO (x);
+      unsigned end_regno = END_REGNO (x);
+      while (regno < end_regno)
+	bitmap_set_bit (set, regno++);
+      return;
+    }
+
+  /* Recursively scan the operands of this expression.  */
+
+  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  /* If we are about to do the last recursive call
+	     needed at this level, change it into iteration.
+	     This function is called enough to be worth it.  */
+	  if (i == 0)
+	    {
+	      x = XEXP (x, 0);
+	      goto repeat;
+	    }
+
+	  record_used_regs (&XEXP (x, i), data);
+	}
+      else if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  record_used_regs (&XVECEXP (x, i, j), data);
+    }
+}
+
 /* Try to combine the insns I0, I1 and I2 into I3.
    Here I0, I1 and I2 appear earlier than I3.
    I0 and I1 can be zero; then we combine just I2 into I3, or I1 and I2 into
@@ -2742,6 +2794,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2,
 
   added_links_insn = 0;
 
+  auto_bitmap i2_regset, i3_regset, links_regset;
+  note_uses (&PATTERN (i2), record_used_regs, (bitmap)i2_regset);
+  note_uses (&PATTERN (i3), record_used_regs, (bitmap)i3_regset);
+  insn_link *ll;
+  FOR_EACH_LOG_LINK (ll, i3)
+    bitmap_set_bit (links_regset, ll->regno);
+  FOR_EACH_LOG_LINK (ll, i2)
+    bitmap_set_bit (links_regset, ll->regno);
+  if (i1)
+   FOR_EACH_LOG_LINK (ll, i1)
+      bitmap_set_bit (links_regset, ll->regno);
+  if (i0)
+    FOR_EACH_LOG_LINK (ll, i0)
+      bitmap_set_bit (links_regset, ll->regno);
+
   /* First check for one important special case that the code below will
      not handle.  Namely, the case where I1 is zero, I2 is a PARALLEL
      and I3 is a SET whose SET_SRC is a SET_DEST in I2.  In that case,
@@ -4051,6 +4124,33 @@ try_combine (rtx_insn *i3, rtx_insn *i2,
       return 0;
     }
 
+  auto_bitmap new_regs_in_i2;
+  if (newi2pat)
+    {
+      /* We need to discover situations where we introduce a use of a
+	 register into I2, where none of the existing LOG_LINKS contain
+	 a reference to it.  This can happen if previously I3 referenced
+	 the reg, and there is an additional use between I2 and I3.  We
+	 must remove the LOG_LINKS entry from that additional use and
+	 distribute it along with our own ones.  */
+	note_uses (&newi2pat, record_used_regs, (bitmap)new_regs_in_i2);
+	bitmap_and_compl_into (new_regs_in_i2, i2_regset);
+	bitmap_and_compl_into (new_regs_in_i2, links_regset);
+
+	/* Here, we first look for situations where a hard register use
+	   moved, and just give up.  This should happen approximately
+	   never, and it's not worth it to deal with possibilities like
+	   multi-word registers.  Later, when fixing up LOG_LINKS, we
+	   deal with the case where a pseudo use moved.  */
+	if (prev_nonnote_insn (i3) != i2)
+	  for (unsigned r = 0; r < FIRST_PSEUDO_REGISTER; r++)
+	    if (bitmap_bit_p (new_regs_in_i2, r))
+	      {
+		undo_all ();
+		return 0;
+	    }
+    }
+
   if (MAY_HAVE_DEBUG_INSNS)
     {
       struct undo *undo;
@@ -4494,6 +4594,45 @@ try_combine (rtx_insn *i3, rtx_insn *i2,
 			    NULL_RTX, NULL_RTX, NULL_RTX);
       }
 
+    if (newi2pat)
+      {
+	bitmap_iterator iter;
+	unsigned int i;
+
+	/* See comments above where we calculate the bitmap.  */
+	EXECUTE_IF_SET_IN_BITMAP ((bitmap)new_regs_in_i2,
+				  LAST_VIRTUAL_REGISTER, i, iter)
+	  {
+	    rtx reg = regno_reg_rtx[i];
+	    rtx_insn *other;
+	    for (other = NEXT_INSN (i2); other != i3; other = NEXT_INSN (other))
+	      if (NONDEBUG_INSN_P (other)
+		  && (reg_overlap_mentioned_p (reg, PATTERN (other))
+		      || (CALL_P (other) && find_reg_fusage (other, USE, reg))))
+		{
+		  if (dump_file)
+		    fprintf (dump_file,
+			     "found extra use of reg %d at insn %d\n", i,
+			     INSN_UID (other));
+		  insn_link **plink;
+		  for (plink = &LOG_LINKS (other);
+		       *plink;
+		       plink = &(*plink)->next)
+		    {
+		      insn_link *link = *plink;
+		      if (link->regno == i)
+			{
+			  *plink = link->next;
+			  link->next = i3links;
+			  i3links = link;
+			  break;
+			}
+		    }
+		  break;
+		}
+	  }
+      }
+
     distribute_links (i3links);
     distribute_links (i2links);
     distribute_links (i1links);
Index: gcc/testsuite/gcc.dg/torture/pr79910.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr79910.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr79910.c	(working copy)
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fweb" } */
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+int a;
+
+static __attribute__ ((noinline, noclone)) u64
+foo (u8 p1, u32 p2)
+{
+  u64 b = a <= 0;
+  p2 = 4;
+  b >>= a == 0;
+  p1 %= 0xfffffffff;
+  p2 >>= b & 31;
+  p1 += b;
+  p2 <<= 31;
+  return p1 + p2 + b;
+}
+
+int
+main (void)
+{
+  u64 x = foo (0, 1);
+  if (x != 0)
+    __builtin_abort ();
+  return 0;
+}

Reply via email to