Denis Chertykov schrieb:
> 2011/4/14 Georg-Johann Lay <a...@gjlay.de>:
>> Denis Chertykov schrieb:
>>> 2011/4/14 Georg-Johann Lay <a...@gjlay.de>:
>>>> The "rotl<mode>3" expanders (mode \in {HI,SI,DI}) violates synopsis by
>>>> using 4 operands instead of 3. This runs in ICE in top of
>>>> optabs.c:maybe_gen_insn.
>>>>
>>>> The right way to do this is to use match_dup, not match_operand. So
>>>> the fix is obvious.
>>>>
>>>> Regenerated avr-gcc and run against avr testsuite:
>>>>
>>>> gcc.target/avr/torture/pr41885.c generates these patterns
>>>>
>>>> Johann
>>>>
>>>> 2011-04-14  Georg-Johann Lay  <a...@gjlay.de>
>>>>
>>>>        * config/avr/avr.md ("rotlhi3", "rotlsi3", "rotldi3"): Fix
>>>>        expanders operand 3 from match_operand to match_dup.
>>> May be better to use (clobber (match_scratch ...)) here and in
>>> `*rotw<mode>' and `*rotb<mode>'.
>> These are splitters that might split before reload, producing strange,
>> non-matching insns like
>>  (set (scratch:QI) ...
>> or crashing already in avr_rotate_bytes becase the split condition reads
>> "&& (reload_completed || <MODE>mode == DImode)"
> 
> Generally I'm agree with you, change match_operand to match_dup is easy.

I already submitted the easy patch to avoid the ICE.

> But IMHO the right way is:
>  - the splitters and avr_rotate_bytes are wrong and must be fixed too.
>  - operands[3] is a scratch register and right way is to use match_scratch.
> 
> I can approve the patch.
> 
> Denis.

Ok, here is the right-way patch.

The expander generates a SCRATCH instead of a pseudo, and in case of a
pre-reload split the SCRATCH is replaced with a pseudo because it is
not known if or if not the scratch will actually be needed.

avr_rotate_bytes now skips operations on non-REG 'scratch'.
Furthermore, I added an assertion that ensures that 'scratch' is
actually a REG when it is needed.

Besides that, I fixed indentation. I know it is not optimal to fix
indentation alongside with functionality... did it anyway :-)

Finally, I exposed alternative #3 of the insns to the register
allocator, because it is not possible to distinguish between
overlapping or non-overlapping regs, and #3 does not need a scratch.

Ran C-testsuite with no regressions.

Johann

2011-04-15  Georg-Johann Lay  <a...@gjlay.de>

        * config/avr/avr.md ("rotl<mode>3"): Use SCRATCH instead
        of REG in operand 3. Fix indentation. Unquote C snippet.
        ("*rotw<mode>","*rotbw<mode>"): Ditto. Replace scratch
        with pseudo for pre-reload splits. Use const_int_operand for
        operand 2. Expose alternative 3 to register allocator.
        * config/avr/avr.c (avr_rotate_bytes): Handle SCRATCH in
        operands[3]. Use GNU indentation style.

Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(Revision 172493)
+++ config/avr/avr.md	(Arbeitskopie)
@@ -1519,22 +1519,22 @@ (define_mode_attr rotsmode [(DI "QI") (S
 
 (define_expand "rotl<mode>3"
   [(parallel [(set (match_operand:HIDI 0 "register_operand" "")
-		   (rotate:HIDI (match_operand:HIDI 1 "register_operand" "")
-				(match_operand:VOID 2 "const_int_operand" "")))
-		(clobber (match_dup 3))])]
+                   (rotate:HIDI (match_operand:HIDI 1 "register_operand" "")
+                                (match_operand:VOID 2 "const_int_operand" "")))
+              (clobber (match_dup 3))])]
   ""
-  "
-{
-  if (CONST_INT_P (operands[2]) && 0 == (INTVAL (operands[2]) % 8))
   {
-  if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16)
-    operands[3] = gen_reg_rtx (<rotsmode>mode);
-  else
-    operands[3] = gen_reg_rtx (QImode);
-  }
-  else
-    FAIL;
-}")
+    if (CONST_INT_P (operands[2])
+        && 0 == INTVAL (operands[2]) % 8)
+      {
+        if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16)
+          operands[3] = gen_rtx_SCRATCH (<rotsmode>mode);
+        else
+          operands[3] = gen_rtx_SCRATCH (QImode);
+      }
+    else
+      FAIL;
+  })
 
 
 ;; Overlapping non-HImode registers often (but not always) need a scratch.
@@ -1545,35 +1545,49 @@ (define_expand "rotl<mode>3"
 
 ; Split word aligned rotates using scratch that is mode dependent.
 (define_insn_and_split "*rotw<mode>"
-  [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
-	(rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
-		     (match_operand 2 "immediate_operand" "n,n,n")))
-   (clobber (match_operand:<rotsmode> 3 "register_operand"  "=<rotx>" ))]
-  "(CONST_INT_P (operands[2]) &&
-     (0 == (INTVAL (operands[2]) % 16) && AVR_HAVE_MOVW))"
+  [(set (match_operand:HIDI 0 "register_operand"              "=r,r,&r")
+        (rotate:HIDI (match_operand:HIDI 1 "register_operand"  "0,r,r")
+                     (match_operand 2 "const_int_operand"      "n,n,n")))
+   (clobber (match_scratch:<rotsmode> 3                       "=<rotx>"))]
+  "AVR_HAVE_MOVW
+   && 0 == INTVAL (operands[2]) % 16"
   "#"
   "&& (reload_completed || <MODE>mode == DImode)"
   [(const_int 0)]
-  "avr_rotate_bytes (operands);
-  DONE;"
-)
+  {
+    /* Split happens in .split1: Cook up a pseudo */
+    if (SCRATCH == GET_CODE (operands[3])
+        && !reload_completed)
+      {
+        operands[3] = gen_reg_rtx (GET_MODE (operands[3]));
+      }
+    avr_rotate_bytes (operands);
+    DONE;
+  })
 
 
 ; Split byte aligned rotates using scratch that is always QI mode.
 (define_insn_and_split "*rotb<mode>"
-  [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
-	(rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
-		     (match_operand 2 "immediate_operand" "n,n,n")))
-   (clobber (match_operand:QI 3 "register_operand" "=<rotx>" ))]
-  "(CONST_INT_P (operands[2]) &&
-     (8 == (INTVAL (operands[2]) % 16)
-     	|| (!AVR_HAVE_MOVW && 0 == (INTVAL (operands[2]) % 16))))"
+  [(set (match_operand:HIDI 0 "register_operand"              "=r,r,&r")
+        (rotate:HIDI (match_operand:HIDI 1 "register_operand"  "0,r,r")
+                     (match_operand 2 "const_int_operand"      "n,n,n")))
+   (clobber (match_scratch:QI 3                               "=<rotx>" ))]
+  "8 == INTVAL (operands[2]) % 16
+   || (!AVR_HAVE_MOVW
+       && 0 == INTVAL (operands[2]) % 16)"
   "#"
   "&& (reload_completed || <MODE>mode == DImode)"
   [(const_int 0)]
-  "avr_rotate_bytes (operands);
-  DONE;"
-)
+  {
+    /* Split happens in .split1: Cook up a pseudo */
+    if (SCRATCH == GET_CODE (operands[3])
+        && !reload_completed)
+      {
+        operands[3] = gen_reg_rtx (QImode);
+      }
+    avr_rotate_bytes (operands);
+    DONE;
+  })
 
 
 ;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(Revision 172493)
+++ config/avr/avr.c	(Arbeitskopie)
@@ -4481,141 +4481,147 @@ lshrsi3_out (rtx insn, rtx operands[], i
 bool
 avr_rotate_bytes (rtx operands[])
 {
-    int i, j;
-    enum machine_mode mode = GET_MODE (operands[0]);
-    bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
-    bool same_reg = rtx_equal_p (operands[0], operands[1]);
-    int num = INTVAL (operands[2]);
-    rtx scratch = operands[3];
-    /* Work out if byte or word move is needed.  Odd byte rotates need QImode.
-       Word move if no scratch is needed, otherwise use size of scratch.  */
-    enum machine_mode move_mode = QImode;
-    int move_size, offset, size;
-
-    if (num & 0xf)
-      move_mode = QImode;
-    else if ((mode == SImode && !same_reg) || !overlapped)
-      move_mode = HImode;
-    else
-      move_mode = GET_MODE (scratch);
-
-    /* Force DI rotate to use QI moves since other DI moves are currently split
-       into QI moves so forward propagation works better.  */
-    if (mode == DImode)
-      move_mode = QImode;
-    /* Make scratch smaller if needed.  */
-    if (GET_MODE (scratch) == HImode && move_mode == QImode)
-      scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); 
-
-    move_size = GET_MODE_SIZE (move_mode);
-    /* Number of bytes/words to rotate.  */
-    offset = (num  >> 3) / move_size;
-    /* Number of moves needed.  */
-    size = GET_MODE_SIZE (mode) / move_size;
-    /* Himode byte swap is special case to avoid a scratch register.  */
-    if (mode == HImode && same_reg)
-      {
-	/* HImode byte swap, using xor.  This is as quick as using scratch.  */
-	rtx src, dst;
-	src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
-	dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
-	if (!rtx_equal_p (dst, src))
-	  {
-	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
-	     emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
-	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
-	  }
-      }    
-    else  
-      {
+  int i, j;
+  enum machine_mode mode = GET_MODE (operands[0]);
+  bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
+  bool same_reg = rtx_equal_p (operands[0], operands[1]);
+  int num = INTVAL (operands[2]);
+  rtx scratch = operands[3];
+  /* Work out if byte or word move is needed.  Odd byte rotates need QImode.
+     Word move if no scratch is needed, otherwise use size of scratch.  */
+  enum machine_mode move_mode = QImode;
+  int move_size, offset, size;
+
+  if (num & 0xf)
+    move_mode = QImode;
+  else if ((mode == SImode && !same_reg) || !overlapped)
+    move_mode = HImode;
+  else
+    move_mode = GET_MODE (scratch);
+
+  /* Force DI rotate to use QI moves since other DI moves are currently split
+     into QI moves so forward propagation works better.  */
+  if (mode == DImode)
+    move_mode = QImode;
+  /* Make scratch smaller if needed.  */
+  if (GET_MODE (scratch) == HImode
+      && move_mode == QImode
+      && REG_P (scratch))
+    {
+      scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
+    }
+
+  move_size = GET_MODE_SIZE (move_mode);
+  /* Number of bytes/words to rotate.  */
+  offset = (num  >> 3) / move_size;
+  /* Number of moves needed.  */
+  size = GET_MODE_SIZE (mode) / move_size;
+  /* Himode byte swap is special case to avoid a scratch register.  */
+  if (mode == HImode && same_reg)
+    {
+      /* HImode byte swap, using xor.  This is as quick as using scratch.  */
+      rtx src, dst;
+      src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
+      dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
+      if (!rtx_equal_p (dst, src))
+        {
+          emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+          emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
+          emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+        }
+    }    
+  else  
+    {
 #define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode)  */
-	/* Create linked list of moves to determine move order.  */
-	struct {
-	  rtx src, dst;
-	  int links;
-	} move[MAX_SIZE + 8];
-	int blocked, moves;
-
-	gcc_assert (size <= MAX_SIZE);
-	/* Generate list of subreg moves.  */
-	for (i = 0; i < size; i++)
-	  {
-	    int from = i;
-	    int to = (from + offset) % size;          
-	    move[i].src = simplify_gen_subreg (move_mode, operands[1],
-						mode, from * move_size);
-	    move[i].dst = simplify_gen_subreg (move_mode, operands[0],
-						mode, to   * move_size);
-	    move[i].links = -1;
-	   }
-	/* Mark dependence where a dst of one move is the src of another move.
-	   The first move is a conflict as it must wait until second is
-	   performed.  We ignore moves to self - we catch this later.  */
-	if (overlapped)
-	  for (i = 0; i < size; i++)
-	    if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
-	      for (j = 0; j < size; j++)
-		if (j != i && rtx_equal_p (move[j].src, move[i].dst))
-		  {
-		    /* The dst of move i is the src of move j.  */
-		    move[i].links = j;
-		    break;
-		  }
-
-	blocked = -1;
-	moves = 0;
-	/* Go through move list and perform non-conflicting moves.  As each
-	   non-overlapping move is made, it may remove other conflicts
-	   so the process is repeated until no conflicts remain.  */
-	do
-	  {
-	    blocked = -1;
-	    moves = 0;
-	    /* Emit move where dst is not also a src or we have used that
-	       src already.  */
-	    for (i = 0; i < size; i++)
-	      if (move[i].src != NULL_RTX)
-		{
-		  if (move[i].links == -1
-		      || move[move[i].links].src == NULL_RTX)
-		    {
-		      moves++;
-		      /* Ignore NOP moves to self.  */
-		      if (!rtx_equal_p (move[i].dst, move[i].src))
-			emit_move_insn (move[i].dst, move[i].src);
-
-		      /* Remove  conflict from list.  */
-		      move[i].src = NULL_RTX;
-		    }
-		  else
-		    blocked = i;
-		}
-
-	    /* Check for deadlock. This is when no moves occurred and we have
-	       at least one blocked move.  */
-	    if (moves == 0 && blocked != -1)
-	      {
-		/* Need to use scratch register to break deadlock.
-		   Add move to put dst of blocked move into scratch.
-		   When this move occurs, it will break chain deadlock.
-		   The scratch register is substituted for real move.  */
-
-		move[size].src = move[blocked].dst;
-		move[size].dst =  scratch;
-		/* Scratch move is never blocked.  */
-		move[size].links = -1; 
-		/* Make sure we have valid link.  */
-		gcc_assert (move[blocked].links != -1);
-		/* Replace src of  blocking move with scratch reg.  */
-		move[move[blocked].links].src = scratch;
-		/* Make dependent on scratch move occuring.  */
-		move[blocked].links = size; 
-		size=size+1;
-	      }
-	  }
-	while (blocked != -1);
-      }
-    return true;
+      /* Create linked list of moves to determine move order.  */
+      struct {
+        rtx src, dst;
+        int links;
+      } move[MAX_SIZE + 8];
+      int blocked, moves;
+
+      gcc_assert (size <= MAX_SIZE);
+      /* Generate list of subreg moves.  */
+      for (i = 0; i < size; i++)
+        {
+          int from = i;
+          int to = (from + offset) % size;          
+          move[i].src = simplify_gen_subreg (move_mode, operands[1],
+                                             mode, from * move_size);
+          move[i].dst = simplify_gen_subreg (move_mode, operands[0],
+                                             mode, to   * move_size);
+          move[i].links = -1;
+        }
+      /* Mark dependence where a dst of one move is the src of another move.
+         The first move is a conflict as it must wait until second is
+         performed.  We ignore moves to self - we catch this later.  */
+      if (overlapped)
+        for (i = 0; i < size; i++)
+          if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
+            for (j = 0; j < size; j++)
+              if (j != i && rtx_equal_p (move[j].src, move[i].dst))
+                {
+                  /* The dst of move i is the src of move j.  */
+                  move[i].links = j;
+                  break;
+                }
+
+      blocked = -1;
+      moves = 0;
+      /* Go through move list and perform non-conflicting moves.  As each
+         non-overlapping move is made, it may remove other conflicts
+         so the process is repeated until no conflicts remain.  */
+      do
+        {
+          blocked = -1;
+          moves = 0;
+          /* Emit move where dst is not also a src or we have used that
+             src already.  */
+          for (i = 0; i < size; i++)
+            if (move[i].src != NULL_RTX)
+              {
+                if (move[i].links == -1
+                    || move[move[i].links].src == NULL_RTX)
+                  {
+                    moves++;
+                    /* Ignore NOP moves to self.  */
+                    if (!rtx_equal_p (move[i].dst, move[i].src))
+                      emit_move_insn (move[i].dst, move[i].src);
+
+                    /* Remove  conflict from list.  */
+                    move[i].src = NULL_RTX;
+                  }
+                else
+                  blocked = i;
+              }
+
+          /* Check for deadlock. This is when no moves occurred and we have
+             at least one blocked move.  */
+          if (moves == 0 && blocked != -1)
+            {
+              /* Need to use scratch register to break deadlock.
+                 Add move to put dst of blocked move into scratch.
+                 When this move occurs, it will break chain deadlock.
+                 The scratch register is substituted for real move.  */
+
+              gcc_assert (REG_P (scratch));
+
+              move[size].src = move[blocked].dst;
+              move[size].dst =  scratch;
+              /* Scratch move is never blocked.  */
+              move[size].links = -1; 
+              /* Make sure we have valid link.  */
+              gcc_assert (move[blocked].links != -1);
+              /* Replace src of  blocking move with scratch reg.  */
+              move[move[blocked].links].src = scratch;
+              /* Make dependent on scratch move occuring.  */
+              move[blocked].links = size; 
+              size=size+1;
+            }
+        }
+      while (blocked != -1);
+    }
+  return true;
 }
 
 /* Modifies the length assigned to instruction INSN

Reply via email to