On Thu, 2011-05-26 at 11:22 +0200, Richard Guenther wrote:

<snip>


> There are several extra pre-conditions in the original code in
> builtins.c as well as commens for reasonings (yes, there seem
> to be duplicates of several of the transforms there ...).  Please
> try to preserve them.  I noticed especially
> 
>               /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
>                  machines that a builtin sqrt instruction is smaller than a
>                  call to pow with 0.25, so do this optimization even if
>                  -Os.  */
> 
> and
> 
>       /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
>          cbrt/sqrts instead of pow (x, 1./6.).  */
>       if (cbrtfn && ! op
>           && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))
> 
> where you omit the non-negative check and the check for NANs.
> Note that -funsafe-math-optimizations does not mean you can
> ignore nans or signed zero issues.  pow(x,1/3) -> cbrt(x) does not
> need a -funsafe-math-optimization check either.

OK.  Good catch on the non-negative/NaN check -- that just slipped
through the cracks.  I've fixed this and beefed up the comments as you
suggest.

> 
> It would be nice if you could re-arrange this function so that before
> each individual replacement all preconditions are checked (even if
> that means duplicating some checks and code) - that way
> reviewing and later maintainance should be much easier.
> 

Done -- you're right, the result is much cleaner.  Hopefully this
version is easier to follow.

Thanks, 
Bill


2011-05-25  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        PR tree-optimization/46728
        * tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
        (powi_as_mults): Add gimple_set_location.
        (build_and_insert_call): New.
        (gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
        0.5, 0.25, 0.75, 1./3., or 1./6.
        

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c    (revision 174199)
+++ gcc/tree-ssa-math-opts.c    (working copy)
@@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
     }
 
   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
+  gimple_set_location (mult_stmt, loc);
   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
 
   return ssa_target;
@@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target, 
                                           build_real (type, dconst1),
                                           result);
+  gimple_set_location (div_stmt, loc);
   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
 
   return target;
@@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
   return NULL_TREE;
 }
 
+/* Build a gimple call statement that calls FN with argument ARG.
+   Set the lhs of the call statement to a fresh SSA name for
+   variable VAR.  If VAR is NULL, first allocate it.  Insert the
+   statement prior to GSI's current position, and return the fresh
+   SSA name.  */
+
+static tree
+build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
+                      tree *var, location_t loc)
+{
+  gimple call_stmt;
+  tree ssa_target;
+
+  if (!*var)
+    {
+      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
+      add_referenced_var (*var);
+    }
+
+  call_stmt = gimple_build_call (fn, 1, arg);
+  ssa_target = make_ssa_name (*var, NULL);
+  gimple_set_lhs (call_stmt, ssa_target);
+  gimple_set_location (call_stmt, loc);
+  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
+
+  return ssa_target;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1033,16 +1063,21 @@ static tree
 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, 
                           tree arg0, tree arg1)
 {
-  REAL_VALUE_TYPE c, cint;
+  REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
   HOST_WIDE_INT n;
+  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
+  tree target = NULL_TREE;
+  enum machine_mode mode;
+  bool hw_sqrt_exists;
+  gimple mult_stmt;
 
   /* If the exponent isn't a constant, there's nothing of interest
      to be done.  */
   if (TREE_CODE (arg1) != REAL_CST)
     return NULL_TREE;
 
-  /* If the exponent is equivalent to an integer, expand it into
-     multiplies when profitable.  */
+  /* If the exponent is equivalent to an integer, expand to an optimal
+     multiplication sequence when profitable.  */
   c = TREE_REAL_CST (arg1);
   n = real_to_integer (&c);
   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
@@ -1054,6 +1089,97 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
              && powi_cost (n) <= POWI_MAX_MULTS)))
     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
 
+  /* Attempt various optimizations using sqrt and cbrt.  */
+  type = TREE_TYPE (arg0);
+  mode = TYPE_MODE (type);
+  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
+
+  /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
+     unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
+     sqrt(-0) = -0.  */
+  if (sqrtfn
+      && REAL_VALUES_EQUAL (c, dconsthalf)
+      && (flag_unsafe_math_optimizations
+         || !HONOR_SIGNED_ZEROS (mode)))
+    return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+  /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
+     a builtin sqrt instruction is smaller than a call to pow with 0.25,
+     so do this optimization even if -Os.  Don't do this optimization
+     if we don't have a hardware sqrt insn.  */
+  dconst1_4 = dconst1;
+  SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
+  hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && REAL_VALUES_EQUAL (c, dconst1_4)
+      && hw_sqrt_exists)
+    {
+      /* sqrt(x)  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* sqrt(sqrt(x))  */
+      return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
+    }
+      
+  /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
+     optimizing for space.  Don't do this optimization if we don't have
+     a hardware sqrt insn.  */
+  real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
+  SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && optimize_function_for_speed_p (cfun)
+      && REAL_VALUES_EQUAL (c, dconst3_4)
+      && hw_sqrt_exists)
+    {
+      /* sqrt(x)  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* sqrt(sqrt(x))  */
+      sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
+
+      /* sqrt(x) * sqrt(sqrt(x))  */
+      ssa_target = make_ssa_name (target, NULL);
+      mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
+                                               sqrt_arg0, sqrt_sqrt);
+      gimple_set_location (mult_stmt, loc);
+      gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
+
+      return ssa_target;
+    }
+
+  /* Optimize pow(x,1./3.) = cbrt(x).  This is always safe.  */
+  cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
+  dconst1_3 = real_value_truncate (mode, dconst_third ());
+
+  if (cbrtfn
+      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
+      && REAL_VALUES_EQUAL (c, dconst1_3))
+    return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
+  
+  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
+     if we don't have a hardware sqrt insn.  */
+  dconst1_6 = dconst1_3;
+  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && cbrtfn
+      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
+      && optimize_function_for_speed_p (cfun)
+      && hw_sqrt_exists
+      && REAL_VALUES_EQUAL (c, dconst1_6))
+    {
+      /* sqrt(x)  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* cbrt(sqrt(x))  */
+      return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
+    }
+
   return NULL_TREE;
 }
 


Reply via email to