Re: [4.8, PATCH 16/26] Backport Power8 and LE support: PR56843

2014-04-03 Thread David Edelsohn
On Wed, Mar 19, 2014 at 3:33 PM, Bill Schmidt
wschm...@linux.vnet.ibm.com wrote:
 Hi,

 This patch (diff-pr56843) backports the fix for PR56843.

 Thanks,
 Bill


 [gcc]

 2014-03-19  Bill Schmidt  wschm...@linux.vnet.ibm.com

 Backport from mainline
 2013-04-05  Bill Schmidt  wschm...@linux.vnet.ibm.com

 PR target/56843
 * config/rs6000/rs6000.c (rs6000_emit_swdiv_high_precision): Remove.
 (rs6000_emit_swdiv_low_precision): Remove.
 (rs6000_emit_swdiv): Rewrite to handle between one and four
 iterations of Newton-Raphson generally; modify required number of
 iterations for some cases.
 * config/rs6000/rs6000.h (RS6000_RECIP_HIGH_PRECISION_P): Remove.

 [gcc/testsuite]

 2014-03-19  Bill Schmidt  wschm...@linux.vnet.ibm.com

 Backport from mainline
 2013-04-05  Bill Schmidt  wschm...@linux.vnet.ibm.com

 PR target/56843
 * gcc.target/powerpc/recip-1.c: Modify expected output.
 * gcc.target/powerpc/recip-3.c: Likewise.
 * gcc.target/powerpc/recip-4.c: Likewise.
 * gcc.target/powerpc/recip-5.c: Add expected output for iterations.

Okay.

Thanks, David


[4.8, PATCH 16/26] Backport Power8 and LE support: PR56843

2014-03-19 Thread Bill Schmidt
Hi,

This patch (diff-pr56843) backports the fix for PR56843.

Thanks,
Bill


[gcc]

2014-03-19  Bill Schmidt  wschm...@linux.vnet.ibm.com

Backport from mainline
2013-04-05  Bill Schmidt  wschm...@linux.vnet.ibm.com

PR target/56843
* config/rs6000/rs6000.c (rs6000_emit_swdiv_high_precision): Remove.
(rs6000_emit_swdiv_low_precision): Remove.
(rs6000_emit_swdiv): Rewrite to handle between one and four
iterations of Newton-Raphson generally; modify required number of
iterations for some cases.
* config/rs6000/rs6000.h (RS6000_RECIP_HIGH_PRECISION_P): Remove.

[gcc/testsuite]

2014-03-19  Bill Schmidt  wschm...@linux.vnet.ibm.com

Backport from mainline
2013-04-05  Bill Schmidt  wschm...@linux.vnet.ibm.com

PR target/56843
* gcc.target/powerpc/recip-1.c: Modify expected output.
* gcc.target/powerpc/recip-3.c: Likewise.
* gcc.target/powerpc/recip-4.c: Likewise.
* gcc.target/powerpc/recip-5.c: Add expected output for iterations.


Index: gcc-4_8-test/gcc/config/rs6000/rs6000.c
===
--- gcc-4_8-test.orig/gcc/config/rs6000/rs6000.c
+++ gcc-4_8-test/gcc/config/rs6000/rs6000.c
@@ -29417,54 +29417,26 @@ rs6000_emit_nmsub (rtx dst, rtx m1, rtx
   emit_insn (gen_rtx_SET (VOIDmode, dst, r));
 }
 
-/* Newton-Raphson approximation of floating point divide with just 2 passes
-   (either single precision floating point, or newer machines with higher
-   accuracy estimates).  Support both scalar and vector divide.  Assumes no
-   trapping math and finite arguments.  */
+/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
+   add a reg_note saying that this was a division.  Support both scalar and
+   vector divide.  Assumes no trapping math and finite arguments.  */
 
-static void
-rs6000_emit_swdiv_high_precision (rtx dst, rtx n, rtx d)
+void
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
 {
   enum machine_mode mode = GET_MODE (dst);
-  rtx x0, e0, e1, y1, u0, v0;
-  enum insn_code code = optab_handler (smul_optab, mode);
-  insn_gen_fn gen_mul = GEN_FCN (code);
-  rtx one = rs6000_load_constant_and_splat (mode, dconst1);
-
-  gcc_assert (code != CODE_FOR_nothing);
-
-  /* x0 = 1./d estimate */
-  x0 = gen_reg_rtx (mode);
-  emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
- UNSPEC_FRES)));
-
-  e0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (e0, d, x0, one);  /* e0 = 1. - (d * x0) */
-
-  e1 = gen_reg_rtx (mode);
-  rs6000_emit_madd (e1, e0, e0, e0);   /* e1 = (e0 * e0) + e0 */
-
-  y1 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y1, e1, x0, x0);   /* y1 = (e1 * x0) + x0 */
-
-  u0 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (u0, n, y1)); /* u0 = n * y1 */
-
-  v0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (v0, d, u0, n);/* v0 = n - (d * u0) */
-
-  rs6000_emit_madd (dst, v0, y1, u0);  /* dst = (v0 * y1) + u0 */
-}
+  rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
+  int i;
 
-/* Newton-Raphson approximation of floating point divide that has a low
-   precision estimate.  Assumes no trapping math and finite arguments.  */
+  /* Low precision estimates guarantee 5 bits of accuracy.  High
+ precision estimates guarantee 14 bits of accuracy.  SFmode
+ requires 23 bits of accuracy.  DFmode requires 52 bits of
+ accuracy.  Each pass at least doubles the accuracy, leading
+ to the following.  */
+  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+  if (mode == DFmode || mode == V2DFmode)
+passes++;
 
-static void
-rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
-{
-  enum machine_mode mode = GET_MODE (dst);
-  rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
   enum insn_code code = optab_handler (smul_optab, mode);
   insn_gen_fn gen_mul = GEN_FCN (code);
 
@@ -29478,46 +29450,44 @@ rs6000_emit_swdiv_low_precision (rtx dst
  gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
  UNSPEC_FRES)));
 
-  e0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (e0, d, x0, one);  /* e0 = 1. - d * x0 */
-
-  y1 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y1, e0, x0, x0);   /* y1 = x0 + e0 * x0 */
-
-  e1 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (e1, e0, e0));/* e1 = e0 * e0 */
-
-  y2 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y2, e1, y1, y1);   /* y2 = y1 + e1 * y1 */
-
-  e2 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (e2, e1, e1));/* e2 = e1 * e1 */
-
-  y3 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y3, e2, y2, y2);   /* y3 = y2 + e2 * y2 */
-
-  u0 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (u0, n, y3)); /* u0 = n * y3 */
-
-  v0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (v0, d, u0, n);/* v0 = n - d * u0