Currently, the fixed-point rounding does not work correctly in the overflow case. This is because of misreading section 2.1.7.2 of TR 18037.
Rounding builtins expand to saturated addition and AND so that the instruction sequence is add value1 if not overflow goto 0 load max value 0: and value2 where the correct sequence reads add value1 if not overflow goto 0 load max value goto 1 0: and value2 1: This change is performed by the patch. The round expander is transformed to an insn that uses avr_out_plus and avr_out_bitop to print most of the instructions. Okay to apply? Johann gcc/ PR target/57516 * config/avr/avr-fixed.md (round<mode>3_const): Turn expander to insn. * config/avr/avr.md (adjust_len): Add `round'. * config/avr/avr-protos.h (avr_out_round): New prototype. (avr_out_plus): Add `out_label' argument. * config/avr/avr.c (avr_out_plus_1): Add `out_label' argument. (avr_out_plus): Pass down `out_label' to avr_out_plus_1. Handle the case where `insn' is just a pattern. (avr_out_bitop): Handle the case where `insn' is just a pattern. (avr_out_round): New function. (avr_adjust_insn_length): Handle ADJUST_LEN_ROUND. libgcc/ PR target/57516 * config/avr/lib1funcs-fixed.S (__roundqq3, __rounduqq3) (__round_s2_const, __round_u2_const) (__round_s4_const, __round_u4_const, __round_x8): Saturate result if addition result cannot be represented. gcc/testsuite/ PR target/57516 * gcc.target/avr/torture/builtins-4-roundfx.c (test2hr, test2k): Adjust to corrected rounding.
Index: gcc/config/avr/avr-fixed.md =================================================================== --- gcc/config/avr/avr-fixed.md (revision 200903) +++ gcc/config/avr/avr-fixed.md (working copy) @@ -447,49 +447,18 @@ (define_expand "round<mode>3" ;; "roundqq3_const" "rounduqq3_const" ;; "roundhq3_const" "rounduhq3_const" "roundha3_const" "rounduha3_const" ;; "roundsq3_const" "roundusq3_const" "roundsa3_const" "roundusa3_const" -(define_expand "round<mode>3_const" - [(parallel [(match_operand:ALL124QA 0 "register_operand" "") - (match_operand:ALL124QA 1 "register_operand" "") - (match_operand:HI 2 "const_int_operand" "")])] +(define_insn "round<mode>3_const" + [(set (match_operand:ALL124QA 0 "register_operand" "=d") + (unspec:ALL124QA [(match_operand:ALL124QA 1 "register_operand" "0") + (match_operand:HI 2 "const_int_operand" "n") + (const_int 0)] + UNSPEC_ROUND))] "" { - // The rounding point RP is $2. The smallest fractional - // bit that is not cleared by the rounding is 2^(-RP). - - enum machine_mode imode = int_mode_for_mode (<MODE>mode); - int fbit = (int) GET_MODE_FBIT (<MODE>mode); - - // Add-Saturate 1/2 * 2^(-RP) - - double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (operands[2])); - rtx x_add = const_fixed_from_double_int (i_add, <MODE>mode); - - if (SIGNED_FIXED_POINT_MODE_P (<MODE>mode)) - emit_move_insn (operands[0], - gen_rtx_SS_PLUS (<MODE>mode, operands[1], x_add)); - else - emit_move_insn (operands[0], - gen_rtx_US_PLUS (<MODE>mode, operands[1], x_add)); - - // Keep all bits from RP and higher: ... 2^(-RP) - // Clear all bits from RP+1 and lower: 2^(-RP-1) ... - // Rounding point ^^^^^^^ - // Added above ^^^^^^^^^ - - rtx xreg = simplify_gen_subreg (imode, operands[0], <MODE>mode, 0); - rtx xmask = immed_double_int_const (-i_add - i_add, imode); - - if (SImode == imode) - emit_insn (gen_andsi3 (xreg, xreg, xmask)); - else if (HImode == imode) - emit_insn (gen_andhi3 (xreg, xreg, xmask)); - else if (QImode == imode) - emit_insn (gen_andqi3 (xreg, xreg, xmask)); - else - gcc_unreachable(); - - DONE; - }) + return avr_out_round (insn, operands); + } + [(set_attr "cc" "clobber") + (set_attr "adjust_len" "round")]) ;; "*roundqq3.libgcc" "*rounduqq3.libgcc" Index: gcc/config/avr/avr.md =================================================================== --- gcc/config/avr/avr.md (revision 200903) +++ gcc/config/avr/avr.md (working copy) @@ -140,7 +140,7 @@ (define_attr "adjust_len" "out_bitop, plus, addto_sp, tsthi, tstpsi, tstsi, compare, compare64, call, mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, - ufract, sfract, + ufract, sfract, round, xload, lpm, movmem, ashlqi, ashrqi, lshrqi, ashlhi, ashrhi, lshrhi, Index: gcc/config/avr/avr-protos.h =================================================================== --- gcc/config/avr/avr-protos.h (revision 200903) +++ gcc/config/avr/avr-protos.h (working copy) @@ -86,7 +86,8 @@ extern int avr_starting_frame_offset (vo extern void avr_output_addr_vec_elt (FILE *stream, int value); extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]); extern const char* avr_out_bitop (rtx, rtx*, int*); -extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL); +extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true); +extern const char* avr_out_round (rtx, rtx*, int* =NULL); extern const char* avr_out_addto_sp (rtx*, int*); extern const char* avr_out_xload (rtx, rtx*, int*); extern const char* avr_out_movmem (rtx, rtx*, int*); Index: gcc/config/avr/avr.c =================================================================== --- gcc/config/avr/avr.c (revision 200903) +++ gcc/config/avr/avr.c (working copy) @@ -6232,11 +6232,14 @@ lshrsi3_out (rtx insn, rtx operands[], i the subtrahend in the original insn, provided it is a compile time constant. In all other cases, SIGN is 0. - Return "". */ + If OUT_LABEL is true, print the final 0: label which is needed for + saturated addition / subtraction. The only case where OUT_LABEL = false + is useful is for saturated addition / subtraction performed during + fixed-point rounding, cf. `avr_out_round'. */ static void avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, - enum rtx_code code_sat = UNKNOWN, int sign = 0) + enum rtx_code code_sat, int sign, bool out_label) { /* MODE of the operation. */ enum machine_mode mode = GET_MODE (xop[0]); @@ -6675,7 +6678,8 @@ avr_out_plus_1 (rtx *xop, int *plen, enu "mov %r0+5,%0", xop, plen, 4); } - avr_asm_len ("0:", op, plen, 0); + if (out_label) + avr_asm_len ("0:", op, plen, 0); } @@ -6713,8 +6717,8 @@ avr_out_plus_symbol (rtx *xop, enum rtx_ /* Prepare operands of addition/subtraction to be used with avr_out_plus_1. - INSN is a single_set insn with a binary operation as SET_SRC that is - one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS. + INSN is a single_set insn or an insn pattern with a binary operation as + SET_SRC that is one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS. XOP are the operands of INSN. In the case of 64-bit operations with constant XOP[] has just one element: The summand/subtrahend in XOP[0]. @@ -6729,19 +6733,22 @@ avr_out_plus_symbol (rtx *xop, enum rtx_ PLEN and PCC default to NULL. + OUT_LABEL defaults to TRUE. For a description, see AVR_OUT_PLUS_1. + Return "" */ const char* -avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc) +avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc, bool out_label) { int cc_plus, cc_minus, cc_dummy; int len_plus, len_minus; rtx op[4]; - rtx xdest = SET_DEST (single_set (insn)); + rtx xpattern = INSN_P (insn) ? single_set (insn) : insn; + rtx xdest = SET_DEST (xpattern); enum machine_mode mode = GET_MODE (xdest); enum machine_mode imode = int_mode_for_mode (mode); int n_bytes = GET_MODE_SIZE (mode); - enum rtx_code code_sat = GET_CODE (SET_SRC (single_set (insn))); + enum rtx_code code_sat = GET_CODE (SET_SRC (xpattern)); enum rtx_code code = (PLUS == code_sat || SS_PLUS == code_sat || US_PLUS == code_sat ? PLUS : MINUS); @@ -6756,7 +6763,7 @@ avr_out_plus (rtx insn, rtx *xop, int *p if (n_bytes <= 4 && REG_P (xop[2])) { - avr_out_plus_1 (xop, plen, code, pcc, code_sat); + avr_out_plus_1 (xop, plen, code, pcc, code_sat, 0, out_label); return ""; } @@ -6783,7 +6790,8 @@ avr_out_plus (rtx insn, rtx *xop, int *p /* Saturations and 64-bit operations don't have a clobber operand. For the other cases, the caller will provide a proper XOP[3]. */ - op[3] = PARALLEL == GET_CODE (PATTERN (insn)) ? xop[3] : NULL_RTX; + xpattern = INSN_P (insn) ? PATTERN (insn) : insn; + op[3] = PARALLEL == GET_CODE (xpattern) ? xop[3] : NULL_RTX; /* Saturation will need the sign of the original operand. */ @@ -6798,8 +6806,8 @@ avr_out_plus (rtx insn, rtx *xop, int *p /* Work out the shortest sequence. */ - avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign); - avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign); + avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign, out_label); + avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign, out_label); if (plen) { @@ -6807,9 +6815,9 @@ avr_out_plus (rtx insn, rtx *xop, int *p *pcc = (len_minus <= len_plus) ? cc_minus : cc_plus; } else if (len_minus <= len_plus) - avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign); + avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign, out_label); else - avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign); + avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign, out_label); return ""; } @@ -6823,13 +6831,15 @@ avr_out_plus (rtx insn, rtx *xop, int *p and return "". If PLEN == NULL, print assembler instructions to perform the operation; otherwise, set *PLEN to the length of the instruction sequence (in words) printed with PLEN == NULL. XOP[3] is either an 8-bit clobber - register or SCRATCH if no clobber register is needed for the operation. */ + register or SCRATCH if no clobber register is needed for the operation. + INSN is an INSN_P or a pattern of an insn. */ const char* avr_out_bitop (rtx insn, rtx *xop, int *plen) { /* CODE and MODE of the operation. */ - enum rtx_code code = GET_CODE (SET_SRC (single_set (insn))); + rtx xpattern = INSN_P (insn) ? single_set (insn) : insn; + enum rtx_code code = GET_CODE (SET_SRC (xpattern)); enum machine_mode mode = GET_MODE (xop[0]); /* Number of bytes to operate on. */ @@ -7332,6 +7342,67 @@ avr_out_fract (rtx insn, rtx operands[], } +/* Output fixed-point rounding. XOP[0] = XOP[1] is the operand to round. + XOP[2] is the rounding point, a CONST_INT. The function prints the + instruction sequence if PLEN = NULL and computes the length in words + of the sequence if PLEN != NULL. Most of this function deals with + preparing operands for calls to `avr_out_plus' and `avr_out_bitop'. */ + +const char* +avr_out_round (rtx insn ATTRIBUTE_UNUSED, rtx *xop, int *plen) +{ + enum machine_mode mode = GET_MODE (xop[0]); + enum machine_mode imode = int_mode_for_mode (mode); + // The smallest fractional bit not cleared by the rounding is 2^(-RP). + int fbit = (int) GET_MODE_FBIT (mode); + double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (xop[2])); + // Lengths of PLUS and AND parts. + int len_add = 0, *plen_add = plen ? &len_add : NULL; + int len_and = 0, *plen_and = plen ? &len_and : NULL; + + // Add-Saturate 1/2 * 2^(-RP). Don't print the label "0:" when printing + // the saturated addition so that we can emit the "rjmp 1f" before the + // "0:" below. + + rtx xadd = const_fixed_from_double_int (i_add, mode); + rtx xpattern, xsrc, op[4]; + + xsrc = SIGNED_FIXED_POINT_MODE_P (mode) + ? gen_rtx_SS_PLUS (mode, xop[1], xadd) + : gen_rtx_US_PLUS (mode, xop[1], xadd); + xpattern = gen_rtx_SET (VOIDmode, xop[0], xsrc); + + op[0] = xop[0]; + op[1] = xop[1]; + op[2] = xadd; + avr_out_plus (xpattern, op, plen_add, NULL, false /* Don't print "0:" */); + + avr_asm_len ("rjmp 1f" CR_TAB + "0:", NULL, plen_add, 1); + + // Keep all bits from RP and higher: ... 2^(-RP) + // Clear all bits from RP+1 and lower: 2^(-RP-1) ... + // Rounding point ^^^^^^^ + // Added above ^^^^^^^^^ + rtx xreg = simplify_gen_subreg (imode, xop[0], mode, 0); + rtx xmask = immed_double_int_const (-i_add - i_add, imode); + + xpattern = gen_rtx_SET (VOIDmode, xreg, gen_rtx_AND (imode, xreg, xmask)); + + op[0] = xreg; + op[1] = xreg; + op[2] = xmask; + op[3] = gen_rtx_SCRATCH (QImode); + avr_out_bitop (xpattern, op, plen_and); + avr_asm_len ("1:", NULL, plen, 0); + + if (plen) + *plen = len_add + len_and; + + return ""; +} + + /* Create RTL split patterns for byte sized rotate expressions. This produces a series of move instructions and considers overlap situations. Overlapping non-HImode operands need a scratch register. */ @@ -7540,6 +7611,7 @@ avr_adjust_insn_length (rtx insn, int le case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break; case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break; + case ADJUST_LEN_ROUND: avr_out_round (insn, op, &len); break; case ADJUST_LEN_TSTHI: avr_out_tsthi (insn, op, &len); break; case ADJUST_LEN_TSTPSI: avr_out_tstpsi (insn, op, &len); break; Index: libgcc/config/avr/lib1funcs-fixed.S =================================================================== --- libgcc/config/avr/lib1funcs-fixed.S (revision 200903) +++ libgcc/config/avr/lib1funcs-fixed.S (working copy) @@ -1464,12 +1464,13 @@ DEFUN __roundqq3 ;; Add-Saturate 2^{-RP-1} add A0, C0 brvc 0f - ldi A0, 0x7f + ldi C0, 0x7f + rjmp 9f 0: ;; Mask out bits beyond RP lsl C0 neg C0 and C0, A0 - mov C1, __tmp_reg__ +9: mov C1, __tmp_reg__ ret ENDF __roundqq3 #endif /* L_roundqq3 */ @@ -1488,12 +1489,13 @@ DEFUN __rounduqq3 ;; Add-Saturate 2^{-RP-1} add A0, C0 brcc 0f - ldi A0, 0xff + ldi C0, 0xff + rjmp 9f 0: ;; Mask out bits beyond RP lsl C0 neg C0 and C0, A0 - mov C1, __tmp_reg__ +9: mov C1, __tmp_reg__ ret ENDF __rounduqq3 #endif /* L_rounduqq3 */ @@ -1565,16 +1567,17 @@ ENDF __rounduha3 DEFUN __round_s2_const brvc 2f - ldi A1, 0x7f + ldi C1, 0x7f rjmp 1f ;; FALLTHRU (Barrier) ENDF __round_s2_const DEFUN __round_u2_const brcc 2f - ldi A1, 0xff + ldi C1, 0xff 1: - ldi A0, 0xff + ldi C0, 0xff + rjmp 9f 2: ;; Saturation is performed now. ;; Currently, we have C[] = 2^{-RP-1} @@ -1586,7 +1589,7 @@ DEFUN __round_u2_const ;; Clear the bits beyond the rounding point. and C0, A0 and C1, A1 - ret +9: ret ENDF __round_u2_const #endif /* L_round_2_const */ @@ -1681,18 +1684,19 @@ ENDF __roundusa3 DEFUN __round_s4_const brvc 2f - ldi A3, 0x7f + ldi C3, 0x7f rjmp 1f ;; FALLTHRU (Barrier) ENDF __round_s4_const DEFUN __round_u4_const brcc 2f - ldi A3, 0xff + ldi C3, 0xff 1: - ldi A2, 0xff - ldi A1, 0xff - ldi A0, 0xff + ldi C2, 0xff + ldi C1, 0xff + ldi C0, 0xff + rjmp 9f 2: ;; Saturation is performed now. ;; Currently, we have C[] = 2^{-RP-1} @@ -1707,7 +1711,7 @@ DEFUN __round_u4_const and C1, A1 and C2, A2 and C3, A3 - ret +9: ret ENDF __round_u4_const #endif /* L_round_4_const */ @@ -1847,12 +1851,13 @@ DEFUN __round_x8 1: ;; Unsigned brcc 3f ;; Unsigned overflow: A[] = 0xff... -2: ldi A7, 0xff - ldi A6, 0xff - wmov A0, A6 - wmov A2, A6 - wmov A4, A6 - bld A7, 7 +2: ldi C7, 0xff + ldi C6, 0xff + wmov C0, C6 + wmov C2, C6 + wmov C4, C6 + bld C7, 7 + rjmp 9f 3: ;; C[] = -C[] - C[] push A0 @@ -1869,7 +1874,7 @@ DEFUN __round_x8 and C5, A5 and C6, A6 and C7, A7 - ;; Epilogue +9: ;; Epilogue pop r29 pop r28 pop r17 Index: gcc/testsuite/gcc.target/avr/torture/builtins-4-roundfx.c =================================================================== --- gcc/testsuite/gcc.target/avr/torture/builtins-4-roundfx.c (revision 200903) +++ gcc/testsuite/gcc.target/avr/torture/builtins-4-roundfx.c (working copy) @@ -72,11 +72,11 @@ DEFTEST1 (long long accum, llk) static void test2hr (void) { - TEST2 (hr, 1, 0x7f, 0x40); - TEST2 (hr, 2, 0x7f, 0b1100000); - TEST2 (hr, 3, 0x7f, 0b1110000); - TEST2 (hr, 4, 0x7f, 0b1111000); - + TEST2 (hr, 1, 0x7f, 0x7f); + TEST2 (hr, 2, 0x70, 0x7f); + TEST2 (hr, 3, 0x78, 0x7f); + TEST2 (hr, 4, 0x7f, 0x7f); + TEST2 (uhr, 1, 0x7f, 0x80); TEST2 (uhr, 2, 0x7f, 0x80); TEST2 (uhr, 3, 0x7f, 0x80); @@ -85,10 +85,13 @@ static void test2hr (void) void test2k (void) { - TEST2 (k, 1, 0x7fffffff, 0x7fff8000 | 0b100000000000000); - TEST2 (k, 2, 0x7fffffff, 0x7fff8000 | 0b110000000000000); - TEST2 (k, 3, 0x7fffffff, 0x7fff8000 | 0b111000000000000); - TEST2 (k, 4, 0x7fffffff, 0x7fff8000 | 0b111100000000000); + TEST2 (k, 1, 0x7fffff00, 0x7fffffff); + TEST2 (k, 2, 0x7ffffff0, 0x7fffffff); + TEST2 (k, 2, 0x7ffff000, 0x7fffffff); + TEST2 (k, 3, 0x7ffff000, 0x7ffff000); + TEST2 (k, 3, 0x7ffff800, 0x7fffffff); + TEST2 (k, 3, 0x7ffff7ff, 0x7ffff000); + TEST2 (k, 4, 0x7ffff7ff, 0x7ffff800); TEST2 (uk, 1, 0x7fffffff, 1ul << 31); TEST2 (uk, 2, 0x7fffffff, 1ul << 31);