[patch,avr] Fix PR57516 fixed-point rounding in the overflow case

Georg-Johann Lay Thu, 18 Jul 2013 03:42:05 -0700

Currently, the fixed-point rounding does not work correctly in the overflow
case.  This is because of misreading section 2.1.7.2 of TR 18037.


Rounding builtins expand to saturated addition and AND so that the instruction
sequence is

add value1
if not overflow goto 0
load max value
0:
and value2

where the correct sequence reads

add value1
if not overflow goto 0
load max value
goto 1
0:
and value2
1:

This change is performed by the patch.  The round expander is transformed to an
 insn that uses avr_out_plus and avr_out_bitop to print most of the 
instructions.

Okay to apply?

Johann

gcc/
        PR target/57516
        * config/avr/avr-fixed.md (round<mode>3_const): Turn expander to insn.
        * config/avr/avr.md (adjust_len): Add `round'.
        * config/avr/avr-protos.h (avr_out_round): New prototype.
        (avr_out_plus): Add `out_label' argument.
        * config/avr/avr.c (avr_out_plus_1): Add `out_label' argument.
        (avr_out_plus): Pass down `out_label' to avr_out_plus_1.
        Handle the case where `insn' is just a pattern.
        (avr_out_bitop): Handle the case where `insn' is just a pattern.
        (avr_out_round): New function.
        (avr_adjust_insn_length): Handle ADJUST_LEN_ROUND.

libgcc/
        PR target/57516
        * config/avr/lib1funcs-fixed.S (__roundqq3, __rounduqq3)
        (__round_s2_const, __round_u2_const)
        (__round_s4_const, __round_u4_const, __round_x8):
        Saturate result if addition result cannot be represented.

gcc/testsuite/
        PR target/57516
        * gcc.target/avr/torture/builtins-4-roundfx.c (test2hr, test2k):
        Adjust to corrected rounding.

Index: gcc/config/avr/avr-fixed.md
===================================================================
--- gcc/config/avr/avr-fixed.md	(revision 200903)
+++ gcc/config/avr/avr-fixed.md	(working copy)
@@ -447,49 +447,18 @@ (define_expand "round<mode>3"
 ;; "roundqq3_const"  "rounduqq3_const"
 ;; "roundhq3_const"  "rounduhq3_const"  "roundha3_const"  "rounduha3_const"
 ;; "roundsq3_const"  "roundusq3_const"  "roundsa3_const"  "roundusa3_const"
-(define_expand "round<mode>3_const"
-  [(parallel [(match_operand:ALL124QA 0 "register_operand" "")
-              (match_operand:ALL124QA 1 "register_operand" "")
-              (match_operand:HI 2 "const_int_operand" "")])]
+(define_insn "round<mode>3_const"
+  [(set (match_operand:ALL124QA 0 "register_operand"                  "=d")
+        (unspec:ALL124QA [(match_operand:ALL124QA 1 "register_operand" "0")
+                          (match_operand:HI 2 "const_int_operand"      "n")
+                          (const_int 0)]
+                         UNSPEC_ROUND))]
   ""
   {
-    // The rounding point RP is $2.  The smallest fractional
-    // bit that is not cleared by the rounding is 2^(-RP).
-
-    enum machine_mode imode = int_mode_for_mode (<MODE>mode);
-    int fbit = (int) GET_MODE_FBIT (<MODE>mode);
-
-    // Add-Saturate  1/2 * 2^(-RP)
-
-    double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (operands[2]));
-    rtx x_add = const_fixed_from_double_int (i_add, <MODE>mode);
-
-    if (SIGNED_FIXED_POINT_MODE_P (<MODE>mode))
-      emit_move_insn (operands[0],
-                      gen_rtx_SS_PLUS (<MODE>mode, operands[1], x_add));
-    else
-      emit_move_insn (operands[0],
-                      gen_rtx_US_PLUS (<MODE>mode, operands[1], x_add));
-
-    // Keep  all bits from RP and higher:   ... 2^(-RP)
-    // Clear all bits from RP+1 and lower:              2^(-RP-1) ...
-    // Rounding point                           ^^^^^^^
-    // Added above                                      ^^^^^^^^^
-
-    rtx xreg = simplify_gen_subreg (imode, operands[0], <MODE>mode, 0);
-    rtx xmask = immed_double_int_const (-i_add - i_add, imode);
-
-    if (SImode == imode)
-      emit_insn (gen_andsi3 (xreg, xreg, xmask));
-    else if (HImode == imode)
-      emit_insn (gen_andhi3 (xreg, xreg, xmask));
-    else if (QImode == imode)
-      emit_insn (gen_andqi3 (xreg, xreg, xmask));
-    else
-      gcc_unreachable();
-
-    DONE;
-  })
+    return avr_out_round (insn, operands);
+  }
+  [(set_attr "cc" "clobber")
+   (set_attr "adjust_len" "round")])
 
 
 ;; "*roundqq3.libgcc"  "*rounduqq3.libgcc"
Index: gcc/config/avr/avr.md
===================================================================
--- gcc/config/avr/avr.md	(revision 200903)
+++ gcc/config/avr/avr.md	(working copy)
@@ -140,7 +140,7 @@ (define_attr "adjust_len"
   "out_bitop, plus, addto_sp,
    tsthi, tstpsi, tstsi, compare, compare64, call,
    mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32,
-   ufract, sfract,
+   ufract, sfract, round,
    xload, lpm, movmem,
    ashlqi, ashrqi, lshrqi,
    ashlhi, ashrhi, lshrhi,
Index: gcc/config/avr/avr-protos.h
===================================================================
--- gcc/config/avr/avr-protos.h	(revision 200903)
+++ gcc/config/avr/avr-protos.h	(working copy)
@@ -86,7 +86,8 @@ extern int avr_starting_frame_offset (vo
 extern void avr_output_addr_vec_elt (FILE *stream, int value);
 extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
 extern const char* avr_out_bitop (rtx, rtx*, int*);
-extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL);
+extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true);
+extern const char* avr_out_round (rtx, rtx*, int* =NULL);
 extern const char* avr_out_addto_sp (rtx*, int*);
 extern const char* avr_out_xload (rtx, rtx*, int*);
 extern const char* avr_out_movmem (rtx, rtx*, int*);
Index: gcc/config/avr/avr.c
===================================================================
--- gcc/config/avr/avr.c	(revision 200903)
+++ gcc/config/avr/avr.c	(working copy)
@@ -6232,11 +6232,14 @@ lshrsi3_out (rtx insn, rtx operands[], i
    the subtrahend in the original insn, provided it is a compile time constant.
    In all other cases, SIGN is 0.
 
-   Return "".  */
+   If OUT_LABEL is true, print the final 0: label which is needed for
+   saturated addition / subtraction.  The only case where OUT_LABEL = false
+   is useful is for saturated addition / subtraction performed during
+   fixed-point rounding, cf. `avr_out_round'.  */
 
 static void
 avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
-                enum rtx_code code_sat = UNKNOWN, int sign = 0)
+                enum rtx_code code_sat, int sign, bool out_label)
 {
   /* MODE of the operation.  */
   enum machine_mode mode = GET_MODE (xop[0]);
@@ -6675,7 +6678,8 @@ avr_out_plus_1 (rtx *xop, int *plen, enu
                      "mov %r0+5,%0", xop, plen, 4);
     }
 
-  avr_asm_len ("0:", op, plen, 0);
+  if (out_label)
+    avr_asm_len ("0:", op, plen, 0);
 }
 
 
@@ -6713,8 +6717,8 @@ avr_out_plus_symbol (rtx *xop, enum rtx_
 
 /* Prepare operands of addition/subtraction to be used with avr_out_plus_1.
 
-   INSN is a single_set insn with a binary operation as SET_SRC that is
-   one of:  PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS.
+   INSN is a single_set insn or an insn pattern with a binary operation as
+   SET_SRC that is one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS.
 
    XOP are the operands of INSN.  In the case of 64-bit operations with
    constant XOP[] has just one element:  The summand/subtrahend in XOP[0].
@@ -6729,19 +6733,22 @@ avr_out_plus_symbol (rtx *xop, enum rtx_
 
    PLEN and PCC default to NULL.
 
+   OUT_LABEL defaults to TRUE.  For a description, see AVR_OUT_PLUS_1.
+
    Return ""  */
 
 const char*
-avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc)
+avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc, bool out_label)
 {
   int cc_plus, cc_minus, cc_dummy;
   int len_plus, len_minus;
   rtx op[4];
-  rtx xdest = SET_DEST (single_set (insn));
+  rtx xpattern = INSN_P (insn) ? single_set (insn) : insn;
+  rtx xdest = SET_DEST (xpattern);
   enum machine_mode mode = GET_MODE (xdest);
   enum machine_mode imode = int_mode_for_mode (mode);
   int n_bytes = GET_MODE_SIZE (mode);
-  enum rtx_code code_sat = GET_CODE (SET_SRC (single_set (insn)));
+  enum rtx_code code_sat = GET_CODE (SET_SRC (xpattern));
   enum rtx_code code
     = (PLUS == code_sat || SS_PLUS == code_sat || US_PLUS == code_sat
        ? PLUS : MINUS);
@@ -6756,7 +6763,7 @@ avr_out_plus (rtx insn, rtx *xop, int *p
 
   if (n_bytes <= 4 && REG_P (xop[2]))
     {
-      avr_out_plus_1 (xop, plen, code, pcc, code_sat);
+      avr_out_plus_1 (xop, plen, code, pcc, code_sat, 0, out_label);
       return "";
     }
 
@@ -6783,7 +6790,8 @@ avr_out_plus (rtx insn, rtx *xop, int *p
   /* Saturations and 64-bit operations don't have a clobber operand.
      For the other cases, the caller will provide a proper XOP[3].  */
 
-  op[3] = PARALLEL == GET_CODE (PATTERN (insn)) ? xop[3] : NULL_RTX;
+  xpattern = INSN_P (insn) ? PATTERN (insn) : insn;
+  op[3] = PARALLEL == GET_CODE (xpattern) ? xop[3] : NULL_RTX;
 
   /* Saturation will need the sign of the original operand.  */
 
@@ -6798,8 +6806,8 @@ avr_out_plus (rtx insn, rtx *xop, int *p
 
   /* Work out the shortest sequence.  */
 
-  avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign);
-  avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign);
+  avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign, out_label);
+  avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign, out_label);
 
   if (plen)
     {
@@ -6807,9 +6815,9 @@ avr_out_plus (rtx insn, rtx *xop, int *p
       *pcc  = (len_minus <= len_plus) ? cc_minus : cc_plus;
     }
   else if (len_minus <= len_plus)
-    avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign);
+    avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign, out_label);
   else
-    avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign);
+    avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign, out_label);
 
   return "";
 }
@@ -6823,13 +6831,15 @@ avr_out_plus (rtx insn, rtx *xop, int *p
    and return "".  If PLEN == NULL, print assembler instructions to perform the
    operation; otherwise, set *PLEN to the length of the instruction sequence
    (in words) printed with PLEN == NULL.  XOP[3] is either an 8-bit clobber
-   register or SCRATCH if no clobber register is needed for the operation.  */
+   register or SCRATCH if no clobber register is needed for the operation.
+   INSN is an INSN_P or a pattern of an insn.  */
 
 const char*
 avr_out_bitop (rtx insn, rtx *xop, int *plen)
 {
   /* CODE and MODE of the operation.  */
-  enum rtx_code code = GET_CODE (SET_SRC (single_set (insn)));
+  rtx xpattern = INSN_P (insn) ? single_set (insn) : insn;
+  enum rtx_code code = GET_CODE (SET_SRC (xpattern));
   enum machine_mode mode = GET_MODE (xop[0]);
 
   /* Number of bytes to operate on.  */
@@ -7332,6 +7342,67 @@ avr_out_fract (rtx insn, rtx operands[],
 }
 
 
+/* Output fixed-point rounding.  XOP[0] = XOP[1] is the operand to round.
+   XOP[2] is the rounding point, a CONST_INT.  The function prints the
+   instruction sequence if PLEN = NULL and computes the length in words
+   of the sequence if PLEN != NULL.  Most of this function deals with
+   preparing operands for calls to `avr_out_plus' and `avr_out_bitop'.  */
+
+const char*
+avr_out_round (rtx insn ATTRIBUTE_UNUSED, rtx *xop, int *plen)
+{
+  enum machine_mode mode = GET_MODE (xop[0]);
+  enum machine_mode imode = int_mode_for_mode (mode);
+  // The smallest fractional bit not cleared by the rounding is 2^(-RP).
+  int fbit = (int) GET_MODE_FBIT (mode);
+  double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (xop[2]));
+  // Lengths of PLUS and AND parts.
+  int len_add = 0, *plen_add = plen ? &len_add : NULL;
+  int len_and = 0, *plen_and = plen ? &len_and : NULL;
+
+  // Add-Saturate  1/2 * 2^(-RP).  Don't print the label "0:" when printing
+  // the saturated addition so that we can emit the "rjmp 1f" before the
+  // "0:" below.
+
+  rtx xadd = const_fixed_from_double_int (i_add, mode);
+  rtx xpattern, xsrc, op[4];
+
+  xsrc = SIGNED_FIXED_POINT_MODE_P (mode)
+    ? gen_rtx_SS_PLUS (mode, xop[1], xadd)
+    : gen_rtx_US_PLUS (mode, xop[1], xadd);
+  xpattern = gen_rtx_SET (VOIDmode, xop[0], xsrc);
+
+  op[0] = xop[0];
+  op[1] = xop[1];
+  op[2] = xadd;
+  avr_out_plus (xpattern, op, plen_add, NULL, false /* Don't print "0:" */);
+
+  avr_asm_len ("rjmp 1f" CR_TAB
+               "0:", NULL, plen_add, 1);
+
+  // Keep  all bits from RP and higher:   ... 2^(-RP)
+  // Clear all bits from RP+1 and lower:              2^(-RP-1) ...
+  // Rounding point                           ^^^^^^^
+  // Added above                                      ^^^^^^^^^
+  rtx xreg = simplify_gen_subreg (imode, xop[0], mode, 0);
+  rtx xmask = immed_double_int_const (-i_add - i_add, imode);
+
+  xpattern = gen_rtx_SET (VOIDmode, xreg, gen_rtx_AND (imode, xreg, xmask));
+
+  op[0] = xreg;
+  op[1] = xreg;
+  op[2] = xmask;
+  op[3] = gen_rtx_SCRATCH (QImode);
+  avr_out_bitop (xpattern, op, plen_and);
+  avr_asm_len ("1:", NULL, plen, 0);
+
+  if (plen)
+    *plen = len_add + len_and;
+
+  return "";
+}
+
+
 /* Create RTL split patterns for byte sized rotate expressions.  This
   produces a series of move instructions and considers overlap situations.
   Overlapping non-HImode operands need a scratch register.  */
@@ -7540,6 +7611,7 @@ avr_adjust_insn_length (rtx insn, int le
 
     case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break;
     case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break;
+    case ADJUST_LEN_ROUND: avr_out_round (insn, op, &len); break;
 
     case ADJUST_LEN_TSTHI: avr_out_tsthi (insn, op, &len); break;
     case ADJUST_LEN_TSTPSI: avr_out_tstpsi (insn, op, &len); break;
Index: libgcc/config/avr/lib1funcs-fixed.S
===================================================================
--- libgcc/config/avr/lib1funcs-fixed.S	(revision 200903)
+++ libgcc/config/avr/lib1funcs-fixed.S	(working copy)
@@ -1464,12 +1464,13 @@ DEFUN  __roundqq3
     ;; Add-Saturate 2^{-RP-1}
     add     A0, C0
     brvc 0f
-    ldi     A0, 0x7f
+    ldi     C0, 0x7f
+    rjmp 9f
 0:  ;; Mask out bits beyond RP
     lsl     C0
     neg     C0
     and     C0, A0
-    mov     C1, __tmp_reg__
+9:  mov     C1, __tmp_reg__
     ret
 ENDF  __roundqq3
 #endif /* L_roundqq3 */
@@ -1488,12 +1489,13 @@ DEFUN  __rounduqq3
     ;; Add-Saturate 2^{-RP-1}
     add     A0, C0
     brcc 0f
-    ldi     A0, 0xff
+    ldi     C0, 0xff
+    rjmp 9f
 0:  ;; Mask out bits beyond RP
     lsl     C0
     neg     C0
     and     C0, A0
-    mov     C1, __tmp_reg__
+9:  mov     C1, __tmp_reg__
     ret
 ENDF  __rounduqq3
 #endif /* L_rounduqq3 */
@@ -1565,16 +1567,17 @@ ENDF  __rounduha3
 
 DEFUN  __round_s2_const
     brvc 2f
-    ldi     A1, 0x7f
+    ldi     C1, 0x7f
     rjmp 1f
     ;; FALLTHRU (Barrier)
 ENDF  __round_s2_const
 
 DEFUN __round_u2_const
     brcc 2f
-    ldi     A1, 0xff
+    ldi     C1, 0xff
 1:
-    ldi     A0, 0xff
+    ldi     C0, 0xff
+    rjmp 9f
 2:
     ;; Saturation is performed now.
     ;; Currently, we have C[] = 2^{-RP-1}
@@ -1586,7 +1589,7 @@ DEFUN __round_u2_const
     ;; Clear the bits beyond the rounding point.
     and     C0, A0
     and     C1, A1
-    ret
+9:  ret
 ENDF  __round_u2_const
 
 #endif /* L_round_2_const */
@@ -1681,18 +1684,19 @@ ENDF  __roundusa3
 
 DEFUN  __round_s4_const
     brvc 2f
-    ldi     A3, 0x7f
+    ldi     C3, 0x7f
     rjmp 1f
     ;; FALLTHRU (Barrier)
 ENDF  __round_s4_const
 
 DEFUN __round_u4_const
     brcc 2f
-    ldi     A3, 0xff
+    ldi     C3, 0xff
 1:
-    ldi     A2, 0xff
-    ldi     A1, 0xff
-    ldi     A0, 0xff
+    ldi     C2, 0xff
+    ldi     C1, 0xff
+    ldi     C0, 0xff
+    rjmp 9f
 2:
     ;; Saturation is performed now.
     ;; Currently, we have C[] = 2^{-RP-1}
@@ -1707,7 +1711,7 @@ DEFUN __round_u4_const
     and     C1, A1
     and     C2, A2
     and     C3, A3
-    ret
+9:  ret
 ENDF  __round_u4_const
 
 #endif /* L_round_4_const */
@@ -1847,12 +1851,13 @@ DEFUN __round_x8
 1:  ;; Unsigned
     brcc    3f
     ;; Unsigned overflow: A[] = 0xff...
-2:  ldi     A7, 0xff
-    ldi     A6, 0xff
-    wmov    A0, A6
-    wmov    A2, A6
-    wmov    A4, A6
-    bld     A7, 7
+2:  ldi     C7, 0xff
+    ldi     C6, 0xff
+    wmov    C0, C6
+    wmov    C2, C6
+    wmov    C4, C6
+    bld     C7, 7
+    rjmp 9f
 3:
     ;;  C[] = -C[] - C[]
     push    A0
@@ -1869,7 +1874,7 @@ DEFUN __round_x8
     and     C5, A5
     and     C6, A6
     and     C7, A7
-    ;; Epilogue
+9:  ;; Epilogue
     pop r29
     pop r28
     pop r17
Index: gcc/testsuite/gcc.target/avr/torture/builtins-4-roundfx.c
===================================================================
--- gcc/testsuite/gcc.target/avr/torture/builtins-4-roundfx.c	(revision 200903)
+++ gcc/testsuite/gcc.target/avr/torture/builtins-4-roundfx.c	(working copy)
@@ -72,11 +72,11 @@ DEFTEST1 (long long accum, llk)
 
 static void test2hr (void)
 {
-  TEST2 (hr, 1, 0x7f, 0x40);
-  TEST2 (hr, 2, 0x7f, 0b1100000);
-  TEST2 (hr, 3, 0x7f, 0b1110000);
-  TEST2 (hr, 4, 0x7f, 0b1111000);
-
+  TEST2 (hr, 1, 0x7f, 0x7f);
+  TEST2 (hr, 2, 0x70, 0x7f);
+  TEST2 (hr, 3, 0x78, 0x7f);
+  TEST2 (hr, 4, 0x7f, 0x7f);
+ 
   TEST2 (uhr, 1, 0x7f, 0x80);
   TEST2 (uhr, 2, 0x7f, 0x80);
   TEST2 (uhr, 3, 0x7f, 0x80);
@@ -85,10 +85,13 @@ static void test2hr (void)
 
 void test2k (void)
 {
-  TEST2 (k, 1, 0x7fffffff, 0x7fff8000 | 0b100000000000000);
-  TEST2 (k, 2, 0x7fffffff, 0x7fff8000 | 0b110000000000000);
-  TEST2 (k, 3, 0x7fffffff, 0x7fff8000 | 0b111000000000000);
-  TEST2 (k, 4, 0x7fffffff, 0x7fff8000 | 0b111100000000000);
+  TEST2 (k, 1, 0x7fffff00, 0x7fffffff);
+  TEST2 (k, 2, 0x7ffffff0, 0x7fffffff);
+  TEST2 (k, 2, 0x7ffff000, 0x7fffffff);
+  TEST2 (k, 3, 0x7ffff000, 0x7ffff000);
+  TEST2 (k, 3, 0x7ffff800, 0x7fffffff);
+  TEST2 (k, 3, 0x7ffff7ff, 0x7ffff000);
+  TEST2 (k, 4, 0x7ffff7ff, 0x7ffff800);
 
   TEST2 (uk, 1, 0x7fffffff, 1ul << 31);
   TEST2 (uk, 2, 0x7fffffff, 1ul << 31);

[patch,avr] Fix PR57516 fixed-point rounding in the overflow case

Reply via email to