This patch do some tweaks to addhi3 like adding QI scratch register.
The original *addhi3 insn is still there and located prior to new
addhi3_clobber insn because addhi3 is special to reload (thanks Danis for this
note) so that there is a version with and a version without scratch register.
Patch passes without regressions.
Ok for trunk?
PR target/50447
* config/avr/avr.md (cc): New alternative out_plus_noclobber.
(adjust_len): Ditto.
(addhi3): Don't pipe through short; use gen_int_mode instead.
Prior to reload, expand to gen_addhi3_clobber.
(*addhi3): Use avr_out_plus_noclobber if applicable, use
out_plus_noclobber in cc and adjust_len attribute.
(addhi3_clobber): 2 new RTL peepholes.
(addhi3_clobber): New insn.
* config/avr/avr-protos.h: (avr_out_plus_noclobber): New prototype.
* config/avr/avr.c (avr_out_plus_noclobber): New function.
(notice_update_cc): Handle CC_OUT_PLUS_NOCLOBBER.
(avr_out_plus_1): Tweak if only MSB is +/-1 and other bytes are 0.
Set cc0 to set_zn for adiw on 16-bit values.
(adjust_insn_length): Handle ADJUST_LEN_OUT_PLUS_NOCLOBBER.
(expand_epilogue): No need to add 0 to frame_pointer_rtx.
Johann
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (revision 180104)
+++ config/avr/avr.md (working copy)
@@ -78,7 +78,7 @@ (define_c_enum "unspecv"
;; Condition code settings.
(define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber,
- out_plus"
+ out_plus, out_plus_noclobber"
(const_string "none"))
(define_attr "type" "branch,branch1,arith,xcall"
@@ -125,7 +125,8 @@ (define_attr "length" ""
;; Otherwise do special processing depending on the attribute.
(define_attr "adjust_len"
- "out_bitop, out_plus, addto_sp, tsthi, tstsi, compare, call,
+ "out_bitop, out_plus, out_plus_noclobber, addto_sp,
+ tsthi, tstsi, compare, call,
mov8, mov16, mov32, reload_in16, reload_in32,
ashlqi, ashrqi, lshrqi,
ashlhi, ashrhi, lshrhi,
@@ -759,14 +760,23 @@ (define_expand "addhi3"
(plus:HI (match_operand:HI 1 "register_operand" "")
(match_operand:HI 2 "nonmemory_operand" "")))]
""
- "
-{
- if (GET_CODE (operands[2]) == CONST_INT)
- {
- short tmp = INTVAL (operands[2]);
- operands[2] = GEN_INT(tmp);
- }
-}")
+ {
+ if (CONST_INT_P (operands[2]))
+ {
+ operands[2] = gen_int_mode (INTVAL (operands[2]), HImode);
+
+ if (!reload_completed
+ && !reload_in_progress
+ && !stack_register_operand (operands[0], HImode)
+ && !stack_register_operand (operands[1], HImode)
+ && !d_register_operand (operands[0], HImode)
+ && !d_register_operand (operands[1], HImode))
+ {
+ emit_insn (gen_addhi3_clobber (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ }
+ })
(define_insn "*addhi3_zero_extend"
@@ -803,20 +813,77 @@ (define_insn "*addhi3_sp_R"
(set_attr "adjust_len" "addto_sp")])
(define_insn "*addhi3"
- [(set (match_operand:HI 0 "register_operand" "=r,!w,!w,d,r,r")
- (plus:HI
- (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
- (match_operand:HI 2 "nonmemory_operand" "r,I,J,i,P,N")))]
+ [(set (match_operand:HI 0 "register_operand" "=r,d,d")
+ (plus:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+ (match_operand:HI 2 "nonmemory_operand" "r,s,n")))]
""
- "@
- add %A0,%A2\;adc %B0,%B2
- adiw %A0,%2
- sbiw %A0,%n2
- subi %A0,lo8(-(%2))\;sbci %B0,hi8(-(%2))
- sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__
- sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__"
- [(set_attr "length" "2,1,1,2,3,3")
- (set_attr "cc" "set_n,set_czn,set_czn,set_czn,set_n,set_n")])
+ {
+ static const char * const asm_code[] =
+ {
+ "add %A0,%A2\;adc %B0,%B2",
+ "subi %A0,lo8(-(%2))\;sbci %B0,hi8(-(%2))",
+ ""
+ };
+
+ if (*asm_code[which_alternative])
+ return asm_code[which_alternative];
+
+ return avr_out_plus_noclobber (operands, NULL, NULL);
+ }
+ [(set_attr "length" "2,2,2")
+ (set_attr "adjust_len" "*,*,out_plus_noclobber")
+ (set_attr "cc" "set_n,set_czn,out_plus_noclobber")])
+
+;; Adding a constant to NO_LD_REGS might have lead to a reload of
+;; that constant to LD_REGS. We don't add a scratch to *addhi3
+;; itself because that insn is special to reload.
+
+(define_peephole2 ; *addhi3_clobber
+ [(set (match_operand:HI 0 "d_register_operand" "")
+ (match_operand:HI 1 "const_int_operand" ""))
+ (set (match_operand:HI 2 "l_register_operand" "")
+ (plus:HI (match_dup 2)
+ (match_dup 0)))]
+ "peep2_reg_dead_p (2, operands[0])"
+ [(parallel [(set (match_dup 2)
+ (plus:HI (match_dup 2)
+ (match_dup 1)))
+ (clobber (match_dup 3))])]
+ {
+ operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 0);
+ })
+
+;; Same, but with reload to NO_LD_REGS
+;; Combine *reload_inhi with *addhi3
+
+(define_peephole2 ; *addhi3_clobber
+ [(parallel [(set (match_operand:HI 0 "l_register_operand" "")
+ (match_operand:HI 1 "const_int_operand" ""))
+ (clobber (match_operand:QI 2 "d_register_operand" ""))])
+ (set (match_operand:HI 3 "l_register_operand" "")
+ (plus:HI (match_dup 3)
+ (match_dup 0)))]
+ "peep2_reg_dead_p (2, operands[0])"
+ [(parallel [(set (match_dup 3)
+ (plus:HI (match_dup 3)
+ (match_dup 1)))
+ (clobber (match_dup 2))])])
+
+(define_insn "addhi3_clobber"
+ [(set (match_operand:HI 0 "register_operand" "=d,l")
+ (plus:HI (match_operand:HI 1 "register_operand" "%0,0")
+ (match_operand:HI 2 "const_int_operand" "n,n")))
+ (clobber (match_scratch:QI 3 "=X,&d"))]
+ ""
+ {
+ gcc_assert (REGNO (operands[0]) == REGNO (operands[1]));
+
+ return avr_out_plus (operands, NULL, NULL);
+ }
+ [(set_attr "length" "4")
+ (set_attr "adjust_len" "out_plus")
+ (set_attr "cc" "out_plus")])
+
(define_insn "addsi3"
[(set (match_operand:SI 0 "register_operand" "=r,d ,d,r")
Index: config/avr/avr-protos.h
===================================================================
--- config/avr/avr-protos.h (revision 180102)
+++ config/avr/avr-protos.h (working copy)
@@ -83,6 +83,7 @@ extern void avr_output_addr_vec_elt (FIL
extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
extern const char* avr_out_bitop (rtx, rtx*, int*);
extern const char* avr_out_plus (rtx*, int*, int*);
+extern const char* avr_out_plus_noclobber (rtx*, int*, int*);
extern const char* avr_out_addto_sp (rtx*, int*);
extern bool avr_popcount_each_byte (rtx, int, int);
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c (revision 180104)
+++ config/avr/avr.c (working copy)
@@ -1051,9 +1051,10 @@ expand_epilogue (bool sibcall_p)
if (frame_pointer_needed)
{
/* Get rid of frame. */
- emit_move_insn(frame_pointer_rtx,
- gen_rtx_PLUS (HImode, frame_pointer_rtx,
- gen_int_mode (size, HImode)));
+ if (size)
+ emit_move_insn (frame_pointer_rtx,
+ gen_rtx_PLUS (HImode, frame_pointer_rtx,
+ gen_int_mode (size, HImode)));
}
else
{
@@ -1682,14 +1683,19 @@ notice_update_cc (rtx body ATTRIBUTE_UNU
break;
case CC_OUT_PLUS:
+ case CC_OUT_PLUS_NOCLOBBER:
{
rtx *op = recog_data.operand;
int len_dummy, icc;
/* Extract insn's operands. */
extract_constrain_insn_cached (insn);
+
+ if (CC_OUT_PLUS == cc)
+ avr_out_plus (op, &len_dummy, &icc);
+ else
+ avr_out_plus_noclobber (op, &len_dummy, &icc);
- avr_out_plus (op, &len_dummy, &icc);
cc = (enum attr_cc) icc;
break;
@@ -4773,7 +4779,8 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
/* Value to add. There are two ways to add VAL: R += VAL and R -= -VAL. */
rtx xval = xop[2];
- /* Addition does not set cc0 in a usable way. */
+ /* Except in the case of ADIW with 16-bit register (see below)
+ addition does not set cc0 in a usable way. */
*pcc = (MINUS == code) ? CC_SET_CZN : CC_CLOBBER;
@@ -4821,6 +4828,9 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
started = true;
avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1",
op, plen, 1);
+
+ if (n_bytes == 2 && PLUS == code)
+ *pcc = CC_SET_ZN;
}
i++;
@@ -4836,6 +4846,14 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
op, plen, 1);
continue;
}
+ else if ((val8 == 1 || val8 == 0xff)
+ && !started
+ && i == n_bytes - 1)
+ {
+ avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0",
+ op, plen, 1);
+ break;
+ }
switch (code)
{
@@ -4924,6 +4942,22 @@ avr_out_plus (rtx *xop, int *plen, int *
}
+/* Same as above but XOP has just 3 entries.
+ Supply a dummy 4th operand. */
+
+const char*
+avr_out_plus_noclobber (rtx *xop, int *plen, int *pcc)
+{
+ rtx op[4];
+
+ op[0] = xop[0];
+ op[1] = xop[1];
+ op[2] = xop[2];
+ op[3] = NULL_RTX;
+
+ return avr_out_plus (op, plen, pcc);
+}
+
/* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile
time constant XOP[2]:
@@ -5308,6 +5342,8 @@ adjust_insn_length (rtx insn, int len)
case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break;
case ADJUST_LEN_OUT_PLUS: avr_out_plus (op, &len, NULL); break;
+ case ADJUST_LEN_OUT_PLUS_NOCLOBBER:
+ avr_out_plus_noclobber (op, &len, NULL); break;
case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break;