On Mon, May 23, 2011 at 11:53 PM, William J. Schmidt
<[email protected]> wrote:
> Richard,
>
> While working on the next patch, I ran into a scenario that will apply
> to this one as well. If the call statement that calls powi contains
> vdef/vuse information, it is lost by this replacement. For example,
>
> # .MEM_20 = VDEF <.MEM_19(D)>
> D.1980_3 = __builtin_powf (D.1979_2, 2.0e=0);
>
> is replaced by
>
> powmult.2_27 = D.1979_2 * D.1979_2;
> D.1980_3 = powmult.2_27;
>
> According to my limited understanding, vuse/vdef ops can't be attached
> to a gimple statement that doesn't have memory operands. Do I need to
> find the # VUSE <.MEM_20> reached by this VDEF and change it to a
> # VUSE <.MEM_19>, in this case?
>
> Any pointers to code for similar situations would be appreciated.
You can do unlink_stmt_vdef (stmt) on the old stmt, that will get rid
of the virtual operands. I see gsi_replace doesn't do that, but it
probably should. Can you try
Index: gcc/gimple-iterator.c
===================================================================
--- gcc/gimple-iterator.c (revision 174106)
+++ gcc/gimple-iterator.c (working copy)
@@ -394,6 +394,7 @@ void
gsi_replace (gimple_stmt_iterator *gsi, gimple stmt, bool update_eh_info)
{
gimple orig_stmt = gsi_stmt (*gsi);
+ tree vop;
if (stmt == orig_stmt)
return;
@@ -409,6 +410,13 @@ gsi_replace (gimple_stmt_iterator *gsi,
if (update_eh_info)
maybe_clean_or_replace_eh_stmt (orig_stmt, stmt);
+ /* Preserve virtual operands from the original statement, they will
+ be dropped by update_stmt if they are not necessary. */
+ if ((vop = gimple_vdef (orig_stmt)) != NULL_TREE)
+ gimple_set_vdef (stmt, vop);
+ if ((vop = gimple_vuse (orig_stmt)) != NULL_TREE)
+ gimple_set_vuse (stmt, vop);
+
gimple_duplicate_stmt_histograms (cfun, stmt, cfun, orig_stmt);
/* Free all the data flow information for ORIG_STMT. */
?
Richard.
> Thanks,
> Bill
>
> -------- Forwarded Message --------
>> From: William J. Schmidt <[email protected]>
>> To: Richard Guenther <[email protected]>
>> Cc: [email protected]
>> Subject: Re: [PATCH] Add powi-to-multiply expansion to cse_sincos pass
>> Date: Mon, 23 May 2011 13:06:31 -0500
>>
>> Richard, thanks for the excellent comments. I really appreciate your
>> help. I've implemented them all, and bootstrap/regtest succeeds on
>> powerpc target. Here's the revised patch for your consideration.
>>
>> Thanks,
>> Bill
>>
>>
>> 2011-05-23 Bill Schmidt <[email protected]>
>>
>> * tree-ssa-math-opts.c (powi_table): New.
>> (powi_lookup_cost): New.
>> (powi_cost): New.
>> (powi_as_mults_1): New.
>> (powi_as_mults): New.
>> (gimple_expand_builtin_powi): New.
>> (execute_cse_sincos): Add switch case for BUILT_IN_POWI.
>> (gate_cse_sincos): Remove sincos/cexp restriction.
>>
>> Index: gcc/ChangeLog
>> ===================================================================
>> --- gcc/ChangeLog (revision 174075)
>> +++ gcc/ChangeLog (working copy)
>> @@ -1,3 +1,14 @@
>> +2011-05-23 Bill Schmidt <[email protected]>
>> +
>> + * tree-ssa-math-opts.c (powi_table): New.
>> + (powi_lookup_cost): New.
>> + (powi_cost): New.
>> + (powi_as_mults_1): New.
>> + (powi_as_mults): New.
>> + (gimple_expand_builtin_powi): New.
>> + (execute_cse_sincos): Add switch case for BUILT_IN_POWI.
>> + (gate_cse_sincos): Remove sincos/cexp restriction.
>> +
>> 2011-05-23 Richard Guenther <[email protected]>
>>
>> * gimple.c (gimple_types_compatible_p_1): Always compare type names.
>> Index: gcc/tree-ssa-math-opts.c
>> ===================================================================
>> --- gcc/tree-ssa-math-opts.c (revision 174075)
>> +++ gcc/tree-ssa-math-opts.c (working copy)
>> @@ -795,8 +795,243 @@ execute_cse_sincos_1 (tree name)
>> return cfg_changed;
>> }
>>
>> +/* To evaluate powi(x,n), the floating point value x raised to the
>> + constant integer exponent n, we use a hybrid algorithm that
>> + combines the "window method" with look-up tables. For an
>> + introduction to exponentiation algorithms and "addition chains",
>> + see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
>> + "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
>> + 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
>> + Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
>> +
>> +/* Provide a default value for POWI_MAX_MULTS, the maximum number of
>> + multiplications to inline before calling the system library's pow
>> + function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
>> + so this default never requires calling pow, powf or powl. */
>> +
>> +#ifndef POWI_MAX_MULTS
>> +#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
>> +#endif
>> +
>> +/* The size of the "optimal power tree" lookup table. All
>> + exponents less than this value are simply looked up in the
>> + powi_table below. This threshold is also used to size the
>> + cache of pseudo registers that hold intermediate results. */
>> +#define POWI_TABLE_SIZE 256
>> +
>> +/* The size, in bits of the window, used in the "window method"
>> + exponentiation algorithm. This is equivalent to a radix of
>> + (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
>> +#define POWI_WINDOW_SIZE 3
>> +
>> +/* The following table is an efficient representation of an
>> + "optimal power tree". For each value, i, the corresponding
>> + value, j, in the table states than an optimal evaluation
>> + sequence for calculating pow(x,i) can be found by evaluating
>> + pow(x,j)*pow(x,i-j). An optimal power tree for the first
>> + 100 integers is given in Knuth's "Seminumerical algorithms". */
>> +
>> +static const unsigned char powi_table[POWI_TABLE_SIZE] =
>> + {
>> + 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
>> + 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
>> + 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
>> + 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
>> + 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
>> + 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
>> + 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
>> + 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
>> + 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
>> + 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
>> + 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
>> + 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
>> + 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
>> + 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
>> + 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
>> + 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
>> + 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
>> + 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
>> + 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
>> + 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
>> + 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
>> + 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
>> + 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
>> + 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
>> + 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
>> + 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
>> + 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
>> + 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
>> + 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
>> + 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
>> + 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
>> + 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
>> + };
>> +
>> +
>> +/* Return the number of multiplications required to calculate
>> + powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
>> + subroutine of powi_cost. CACHE is an array indicating
>> + which exponents have already been calculated. */
>> +
>> +static int
>> +powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
>> +{
>> + /* If we've already calculated this exponent, then this evaluation
>> + doesn't require any additional multiplications. */
>> + if (cache[n])
>> + return 0;
>> +
>> + cache[n] = true;
>> + return powi_lookup_cost (n - powi_table[n], cache)
>> + + powi_lookup_cost (powi_table[n], cache) + 1;
>> +}
>> +
>> +/* Return the number of multiplications required to calculate
>> + powi(x,n) for an arbitrary x, given the exponent N. This
>> + function needs to be kept in sync with powi_as_mults below. */
>> +
>> +static int
>> +powi_cost (HOST_WIDE_INT n)
>> +{
>> + bool cache[POWI_TABLE_SIZE];
>> + unsigned HOST_WIDE_INT digit;
>> + unsigned HOST_WIDE_INT val;
>> + int result;
>> +
>> + if (n == 0)
>> + return 0;
>> +
>> + /* Ignore the reciprocal when calculating the cost. */
>> + val = (n < 0) ? -n : n;
>> +
>> + /* Initialize the exponent cache. */
>> + memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
>> + cache[1] = true;
>> +
>> + result = 0;
>> +
>> + while (val >= POWI_TABLE_SIZE)
>> + {
>> + if (val & 1)
>> + {
>> + digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
>> + result += powi_lookup_cost (digit, cache)
>> + + POWI_WINDOW_SIZE + 1;
>> + val >>= POWI_WINDOW_SIZE;
>> + }
>> + else
>> + {
>> + val >>= 1;
>> + result++;
>> + }
>> + }
>> +
>> + return result + powi_lookup_cost (val, cache);
>> +}
>> +
>> +/* Recursive subroutine of powi_as_mults. This function takes the
>> + array, CACHE, of already calculated exponents and an exponent N and
>> + returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
>> +
>> +static tree
>> +powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
>> + HOST_WIDE_INT n, tree *cache, tree target)
>> +{
>> + tree op0, op1, ssa_target;
>> + unsigned HOST_WIDE_INT digit;
>> + gimple mult_stmt;
>> +
>> + if (n < POWI_TABLE_SIZE)
>> + {
>> + if (cache[n])
>> + return cache[n];
>> +
>> + ssa_target = make_ssa_name (target, NULL);
>> + cache[n] = ssa_target;
>> +
>> + op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache,
>> target);
>> + op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache, target);
>> + }
>> + else if (n & 1)
>> + {
>> + digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
>> + op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache, target);
>> + op1 = powi_as_mults_1 (gsi, loc, type, digit, cache, target);
>> + ssa_target = make_ssa_name (target, NULL);
>> + }
>> + else
>> + {
>> + op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache, target);
>> + op1 = op0;
>> + ssa_target = make_ssa_name (target, NULL);
>> + }
>> +
>> + mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0,
>> op1);
>> + gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
>> +
>> + return ssa_target;
>> +}
>> +
>> +/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
>> + This function needs to be kept in sync with powi_cost above. */
>> +
>> +static tree
>> +powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
>> + tree arg0, HOST_WIDE_INT n)
>> +{
>> + tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0), target;
>> + gimple div_stmt;
>> +
>> + if (n == 0)
>> + return build_real (type, dconst1);
>> +
>> + memset (cache, 0, sizeof (cache));
>> + cache[1] = arg0;
>> +
>> + target = create_tmp_var (type, "powmult");
>> + add_referenced_var (target);
>> +
>> + result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache,
>> target);
>> +
>> + if (n >= 0)
>> + return result;
>> +
>> + /* If the original exponent was negative, reciprocate the result. */
>> + target = create_tmp_var (type, "powmult");
>> + add_referenced_var (target);
>> + target = make_ssa_name (target, NULL);
>> +
>> + div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
>> + build_real (type, dconst1),
>> + result);
>> + gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
>> +
>> + return target;
>> +}
>> +
>> +/* ARG0 and N are the two arguments to a powi builtin in GSI with
>> + location info LOC. If the arguments are appropriate, create an
>> + equivalent sequence of statements prior to GSI using an optimal
>> + number of multiplications, and return an expession holding the
>> + result. */
>> +
>> +static tree
>> +gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
>> + tree arg0, HOST_WIDE_INT n)
>> +{
>> + /* Avoid largest negative number. */
>> + if (n != -n
>> + && ((n >= -1 && n <= 2)
>> + || (optimize_function_for_speed_p (cfun)
>> + && powi_cost (n) <= POWI_MAX_MULTS)))
>> + return powi_as_mults (gsi, loc, arg0, n);
>> +
>> + return NULL_TREE;
>> +}
>> +
>> /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
>> - on the SSA_NAME argument of each of them. */
>> + on the SSA_NAME argument of each of them. Also expand powi(x,n) into
>> + an optimal number of multiplies, when n is a constant. */
>>
>> static unsigned int
>> execute_cse_sincos (void)
>> @@ -821,7 +1056,9 @@ execute_cse_sincos (void)
>> && (fndecl = gimple_call_fndecl (stmt))
>> && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
>> {
>> - tree arg;
>> + tree arg, arg0, arg1, result;
>> + HOST_WIDE_INT n, n_hi;
>> + location_t loc;
>>
>> switch (DECL_FUNCTION_CODE (fndecl))
>> {
>> @@ -833,6 +1070,29 @@ execute_cse_sincos (void)
>> cfg_changed |= execute_cse_sincos_1 (arg);
>> break;
>>
>> + CASE_FLT_FN (BUILT_IN_POWI):
>> + arg0 = gimple_call_arg (stmt, 0);
>> + arg1 = gimple_call_arg (stmt, 1);
>> + if (!host_integerp (arg1, 0))
>> + break;
>> +
>> + n = TREE_INT_CST_LOW (arg1);
>> + n_hi = TREE_INT_CST_HIGH (arg1);
>> + if (n_hi != 0 && n_hi != -1)
>> + break;
>> +
>> + loc = gimple_location (stmt);
>> + result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
>> +
>> + if (result)
>> + {
>> + tree lhs = gimple_get_lhs (stmt);
>> + gimple new_stmt = gimple_build_assign (lhs, result);
>> + gimple_set_location (new_stmt, loc);
>> + gsi_replace (&gsi, new_stmt, true);
>> + }
>> + break;
>> +
>> default:;
>> }
>> }
>> @@ -849,10 +1109,9 @@ execute_cse_sincos (void)
>> static bool
>> gate_cse_sincos (void)
>> {
>> - /* Make sure we have either sincos or cexp. */
>> - return (TARGET_HAS_SINCOS
>> - || TARGET_C99_FUNCTIONS)
>> - && optimize;
>> + /* We no longer require either sincos or cexp, since powi expansion
>> + piggybacks on this pass. */
>> + return optimize;
>> }
>>
>> struct gimple_opt_pass pass_cse_sincos =
>>
>>
>
>