Denis Chertykov schrieb:
> 2011/4/14 Georg-Johann Lay <[email protected]>:
>> Denis Chertykov schrieb:
>>> 2011/4/14 Georg-Johann Lay <[email protected]>:
>>>> The "rotl<mode>3" expanders (mode \in {HI,SI,DI}) violates synopsis by
>>>> using 4 operands instead of 3. This runs in ICE in top of
>>>> optabs.c:maybe_gen_insn.
>>>>
>>>> The right way to do this is to use match_dup, not match_operand. So
>>>> the fix is obvious.
>>>>
>>>> Regenerated avr-gcc and run against avr testsuite:
>>>>
>>>> gcc.target/avr/torture/pr41885.c generates these patterns
>>>>
>>>> Johann
>>>>
>>>> 2011-04-14 Georg-Johann Lay <[email protected]>
>>>>
>>>> * config/avr/avr.md ("rotlhi3", "rotlsi3", "rotldi3"): Fix
>>>> expanders operand 3 from match_operand to match_dup.
>>> May be better to use (clobber (match_scratch ...)) here and in
>>> `*rotw<mode>' and `*rotb<mode>'.
>> These are splitters that might split before reload, producing strange,
>> non-matching insns like
>> (set (scratch:QI) ...
>> or crashing already in avr_rotate_bytes becase the split condition reads
>> "&& (reload_completed || <MODE>mode == DImode)"
>
> Generally I'm agree with you, change match_operand to match_dup is easy.
I already submitted the easy patch to avoid the ICE.
> But IMHO the right way is:
> - the splitters and avr_rotate_bytes are wrong and must be fixed too.
> - operands[3] is a scratch register and right way is to use match_scratch.
>
> I can approve the patch.
>
> Denis.
Ok, here is the right-way patch.
The expander generates a SCRATCH instead of a pseudo, and in case of a
pre-reload split the SCRATCH is replaced with a pseudo because it is
not known if or if not the scratch will actually be needed.
avr_rotate_bytes now skips operations on non-REG 'scratch'.
Furthermore, I added an assertion that ensures that 'scratch' is
actually a REG when it is needed.
Besides that, I fixed indentation. I know it is not optimal to fix
indentation alongside with functionality... did it anyway :-)
Finally, I exposed alternative #3 of the insns to the register
allocator, because it is not possible to distinguish between
overlapping or non-overlapping regs, and #3 does not need a scratch.
Ran C-testsuite with no regressions.
Johann
2011-04-15 Georg-Johann Lay <[email protected]>
* config/avr/avr.md ("rotl<mode>3"): Use SCRATCH instead
of REG in operand 3. Fix indentation. Unquote C snippet.
("*rotw<mode>","*rotbw<mode>"): Ditto. Replace scratch
with pseudo for pre-reload splits. Use const_int_operand for
operand 2. Expose alternative 3 to register allocator.
* config/avr/avr.c (avr_rotate_bytes): Handle SCRATCH in
operands[3]. Use GNU indentation style.
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (Revision 172493)
+++ config/avr/avr.md (Arbeitskopie)
@@ -1519,22 +1519,22 @@ (define_mode_attr rotsmode [(DI "QI") (S
(define_expand "rotl<mode>3"
[(parallel [(set (match_operand:HIDI 0 "register_operand" "")
- (rotate:HIDI (match_operand:HIDI 1 "register_operand" "")
- (match_operand:VOID 2 "const_int_operand" "")))
- (clobber (match_dup 3))])]
+ (rotate:HIDI (match_operand:HIDI 1 "register_operand" "")
+ (match_operand:VOID 2 "const_int_operand" "")))
+ (clobber (match_dup 3))])]
""
- "
-{
- if (CONST_INT_P (operands[2]) && 0 == (INTVAL (operands[2]) % 8))
{
- if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16)
- operands[3] = gen_reg_rtx (<rotsmode>mode);
- else
- operands[3] = gen_reg_rtx (QImode);
- }
- else
- FAIL;
-}")
+ if (CONST_INT_P (operands[2])
+ && 0 == INTVAL (operands[2]) % 8)
+ {
+ if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16)
+ operands[3] = gen_rtx_SCRATCH (<rotsmode>mode);
+ else
+ operands[3] = gen_rtx_SCRATCH (QImode);
+ }
+ else
+ FAIL;
+ })
;; Overlapping non-HImode registers often (but not always) need a scratch.
@@ -1545,35 +1545,49 @@ (define_expand "rotl<mode>3"
; Split word aligned rotates using scratch that is mode dependent.
(define_insn_and_split "*rotw<mode>"
- [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
- (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
- (match_operand 2 "immediate_operand" "n,n,n")))
- (clobber (match_operand:<rotsmode> 3 "register_operand" "=<rotx>" ))]
- "(CONST_INT_P (operands[2]) &&
- (0 == (INTVAL (operands[2]) % 16) && AVR_HAVE_MOVW))"
+ [(set (match_operand:HIDI 0 "register_operand" "=r,r,&r")
+ (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
+ (match_operand 2 "const_int_operand" "n,n,n")))
+ (clobber (match_scratch:<rotsmode> 3 "=<rotx>"))]
+ "AVR_HAVE_MOVW
+ && 0 == INTVAL (operands[2]) % 16"
"#"
"&& (reload_completed || <MODE>mode == DImode)"
[(const_int 0)]
- "avr_rotate_bytes (operands);
- DONE;"
-)
+ {
+ /* Split happens in .split1: Cook up a pseudo */
+ if (SCRATCH == GET_CODE (operands[3])
+ && !reload_completed)
+ {
+ operands[3] = gen_reg_rtx (GET_MODE (operands[3]));
+ }
+ avr_rotate_bytes (operands);
+ DONE;
+ })
; Split byte aligned rotates using scratch that is always QI mode.
(define_insn_and_split "*rotb<mode>"
- [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
- (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
- (match_operand 2 "immediate_operand" "n,n,n")))
- (clobber (match_operand:QI 3 "register_operand" "=<rotx>" ))]
- "(CONST_INT_P (operands[2]) &&
- (8 == (INTVAL (operands[2]) % 16)
- || (!AVR_HAVE_MOVW && 0 == (INTVAL (operands[2]) % 16))))"
+ [(set (match_operand:HIDI 0 "register_operand" "=r,r,&r")
+ (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
+ (match_operand 2 "const_int_operand" "n,n,n")))
+ (clobber (match_scratch:QI 3 "=<rotx>" ))]
+ "8 == INTVAL (operands[2]) % 16
+ || (!AVR_HAVE_MOVW
+ && 0 == INTVAL (operands[2]) % 16)"
"#"
"&& (reload_completed || <MODE>mode == DImode)"
[(const_int 0)]
- "avr_rotate_bytes (operands);
- DONE;"
-)
+ {
+ /* Split happens in .split1: Cook up a pseudo */
+ if (SCRATCH == GET_CODE (operands[3])
+ && !reload_completed)
+ {
+ operands[3] = gen_reg_rtx (QImode);
+ }
+ avr_rotate_bytes (operands);
+ DONE;
+ })
;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c (Revision 172493)
+++ config/avr/avr.c (Arbeitskopie)
@@ -4481,141 +4481,147 @@ lshrsi3_out (rtx insn, rtx operands[], i
bool
avr_rotate_bytes (rtx operands[])
{
- int i, j;
- enum machine_mode mode = GET_MODE (operands[0]);
- bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
- bool same_reg = rtx_equal_p (operands[0], operands[1]);
- int num = INTVAL (operands[2]);
- rtx scratch = operands[3];
- /* Work out if byte or word move is needed. Odd byte rotates need QImode.
- Word move if no scratch is needed, otherwise use size of scratch. */
- enum machine_mode move_mode = QImode;
- int move_size, offset, size;
-
- if (num & 0xf)
- move_mode = QImode;
- else if ((mode == SImode && !same_reg) || !overlapped)
- move_mode = HImode;
- else
- move_mode = GET_MODE (scratch);
-
- /* Force DI rotate to use QI moves since other DI moves are currently split
- into QI moves so forward propagation works better. */
- if (mode == DImode)
- move_mode = QImode;
- /* Make scratch smaller if needed. */
- if (GET_MODE (scratch) == HImode && move_mode == QImode)
- scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
-
- move_size = GET_MODE_SIZE (move_mode);
- /* Number of bytes/words to rotate. */
- offset = (num >> 3) / move_size;
- /* Number of moves needed. */
- size = GET_MODE_SIZE (mode) / move_size;
- /* Himode byte swap is special case to avoid a scratch register. */
- if (mode == HImode && same_reg)
- {
- /* HImode byte swap, using xor. This is as quick as using scratch. */
- rtx src, dst;
- src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
- dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
- if (!rtx_equal_p (dst, src))
- {
- emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
- emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
- emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
- }
- }
- else
- {
+ int i, j;
+ enum machine_mode mode = GET_MODE (operands[0]);
+ bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
+ bool same_reg = rtx_equal_p (operands[0], operands[1]);
+ int num = INTVAL (operands[2]);
+ rtx scratch = operands[3];
+ /* Work out if byte or word move is needed. Odd byte rotates need QImode.
+ Word move if no scratch is needed, otherwise use size of scratch. */
+ enum machine_mode move_mode = QImode;
+ int move_size, offset, size;
+
+ if (num & 0xf)
+ move_mode = QImode;
+ else if ((mode == SImode && !same_reg) || !overlapped)
+ move_mode = HImode;
+ else
+ move_mode = GET_MODE (scratch);
+
+ /* Force DI rotate to use QI moves since other DI moves are currently split
+ into QI moves so forward propagation works better. */
+ if (mode == DImode)
+ move_mode = QImode;
+ /* Make scratch smaller if needed. */
+ if (GET_MODE (scratch) == HImode
+ && move_mode == QImode
+ && REG_P (scratch))
+ {
+ scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
+ }
+
+ move_size = GET_MODE_SIZE (move_mode);
+ /* Number of bytes/words to rotate. */
+ offset = (num >> 3) / move_size;
+ /* Number of moves needed. */
+ size = GET_MODE_SIZE (mode) / move_size;
+ /* Himode byte swap is special case to avoid a scratch register. */
+ if (mode == HImode && same_reg)
+ {
+ /* HImode byte swap, using xor. This is as quick as using scratch. */
+ rtx src, dst;
+ src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
+ dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
+ if (!rtx_equal_p (dst, src))
+ {
+ emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+ emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
+ emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+ }
+ }
+ else
+ {
#define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode) */
- /* Create linked list of moves to determine move order. */
- struct {
- rtx src, dst;
- int links;
- } move[MAX_SIZE + 8];
- int blocked, moves;
-
- gcc_assert (size <= MAX_SIZE);
- /* Generate list of subreg moves. */
- for (i = 0; i < size; i++)
- {
- int from = i;
- int to = (from + offset) % size;
- move[i].src = simplify_gen_subreg (move_mode, operands[1],
- mode, from * move_size);
- move[i].dst = simplify_gen_subreg (move_mode, operands[0],
- mode, to * move_size);
- move[i].links = -1;
- }
- /* Mark dependence where a dst of one move is the src of another move.
- The first move is a conflict as it must wait until second is
- performed. We ignore moves to self - we catch this later. */
- if (overlapped)
- for (i = 0; i < size; i++)
- if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
- for (j = 0; j < size; j++)
- if (j != i && rtx_equal_p (move[j].src, move[i].dst))
- {
- /* The dst of move i is the src of move j. */
- move[i].links = j;
- break;
- }
-
- blocked = -1;
- moves = 0;
- /* Go through move list and perform non-conflicting moves. As each
- non-overlapping move is made, it may remove other conflicts
- so the process is repeated until no conflicts remain. */
- do
- {
- blocked = -1;
- moves = 0;
- /* Emit move where dst is not also a src or we have used that
- src already. */
- for (i = 0; i < size; i++)
- if (move[i].src != NULL_RTX)
- {
- if (move[i].links == -1
- || move[move[i].links].src == NULL_RTX)
- {
- moves++;
- /* Ignore NOP moves to self. */
- if (!rtx_equal_p (move[i].dst, move[i].src))
- emit_move_insn (move[i].dst, move[i].src);
-
- /* Remove conflict from list. */
- move[i].src = NULL_RTX;
- }
- else
- blocked = i;
- }
-
- /* Check for deadlock. This is when no moves occurred and we have
- at least one blocked move. */
- if (moves == 0 && blocked != -1)
- {
- /* Need to use scratch register to break deadlock.
- Add move to put dst of blocked move into scratch.
- When this move occurs, it will break chain deadlock.
- The scratch register is substituted for real move. */
-
- move[size].src = move[blocked].dst;
- move[size].dst = scratch;
- /* Scratch move is never blocked. */
- move[size].links = -1;
- /* Make sure we have valid link. */
- gcc_assert (move[blocked].links != -1);
- /* Replace src of blocking move with scratch reg. */
- move[move[blocked].links].src = scratch;
- /* Make dependent on scratch move occuring. */
- move[blocked].links = size;
- size=size+1;
- }
- }
- while (blocked != -1);
- }
- return true;
+ /* Create linked list of moves to determine move order. */
+ struct {
+ rtx src, dst;
+ int links;
+ } move[MAX_SIZE + 8];
+ int blocked, moves;
+
+ gcc_assert (size <= MAX_SIZE);
+ /* Generate list of subreg moves. */
+ for (i = 0; i < size; i++)
+ {
+ int from = i;
+ int to = (from + offset) % size;
+ move[i].src = simplify_gen_subreg (move_mode, operands[1],
+ mode, from * move_size);
+ move[i].dst = simplify_gen_subreg (move_mode, operands[0],
+ mode, to * move_size);
+ move[i].links = -1;
+ }
+ /* Mark dependence where a dst of one move is the src of another move.
+ The first move is a conflict as it must wait until second is
+ performed. We ignore moves to self - we catch this later. */
+ if (overlapped)
+ for (i = 0; i < size; i++)
+ if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
+ for (j = 0; j < size; j++)
+ if (j != i && rtx_equal_p (move[j].src, move[i].dst))
+ {
+ /* The dst of move i is the src of move j. */
+ move[i].links = j;
+ break;
+ }
+
+ blocked = -1;
+ moves = 0;
+ /* Go through move list and perform non-conflicting moves. As each
+ non-overlapping move is made, it may remove other conflicts
+ so the process is repeated until no conflicts remain. */
+ do
+ {
+ blocked = -1;
+ moves = 0;
+ /* Emit move where dst is not also a src or we have used that
+ src already. */
+ for (i = 0; i < size; i++)
+ if (move[i].src != NULL_RTX)
+ {
+ if (move[i].links == -1
+ || move[move[i].links].src == NULL_RTX)
+ {
+ moves++;
+ /* Ignore NOP moves to self. */
+ if (!rtx_equal_p (move[i].dst, move[i].src))
+ emit_move_insn (move[i].dst, move[i].src);
+
+ /* Remove conflict from list. */
+ move[i].src = NULL_RTX;
+ }
+ else
+ blocked = i;
+ }
+
+ /* Check for deadlock. This is when no moves occurred and we have
+ at least one blocked move. */
+ if (moves == 0 && blocked != -1)
+ {
+ /* Need to use scratch register to break deadlock.
+ Add move to put dst of blocked move into scratch.
+ When this move occurs, it will break chain deadlock.
+ The scratch register is substituted for real move. */
+
+ gcc_assert (REG_P (scratch));
+
+ move[size].src = move[blocked].dst;
+ move[size].dst = scratch;
+ /* Scratch move is never blocked. */
+ move[size].links = -1;
+ /* Make sure we have valid link. */
+ gcc_assert (move[blocked].links != -1);
+ /* Replace src of blocking move with scratch reg. */
+ move[move[blocked].links].src = scratch;
+ /* Make dependent on scratch move occuring. */
+ move[blocked].links = size;
+ size=size+1;
+ }
+ }
+ while (blocked != -1);
+ }
+ return true;
}
/* Modifies the length assigned to instruction INSN