Hi Richard,
On 05/25/2011 03:44 PM, Richard Sandiford wrote:
> Sorry for being so late. I was just curious...
>
> Tom de Vries <[email protected]> writes:
>> The init cost of an iv will in general not be zero. It will be
>> exceptional that the iv register happens to be initialized with the
>> proper value at no cost. In general, there will at the very least be a
>> regcopy or a const set.
>>
>> 2011-05-05 Tom de Vries <[email protected]>
>>
>> PR target/45098
>> * tree-ssa-loop-ivopts.c (determine_iv_cost): Prevent
>> cost_base.cost == 0.
>> Index: gcc/tree-ssa-loop-ivopts.c
>> ===================================================================
>> --- gcc/tree-ssa-loop-ivopts.c (revision 173380)
>> +++ gcc/tree-ssa-loop-ivopts.c (working copy)
>> @@ -4688,6 +4688,8 @@ determine_iv_cost (struct ivopts_data *d
>>
>> base = cand->iv->base;
>> cost_base = force_var_cost (data, base, NULL);
>> + if (cost_base.cost == 0)
>> + cost_base.cost = COSTS_N_INSNS (1);
>> cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
>>
>> cost = cost_step + adjust_setup_cost (data, cost_base.cost);
>
> ...why does this reasoning apply only to this call to force_var_cost?
>
> Richard
force_var_cost is described as estimating the cost of forcing expression expr
into a variable. If expr is already a var, this definition is ambiguous.
If we can use the var directly, the cost is zero, but if we need a regcopy, it
should be the cost of a regcopy.
What is special for an iv, is that we know that it is not only used but also
modified. If a var is used in or after the loop, we need a regcopy to init the
iv with that var. If that var is not used in or after the loop, we can use that
var as iv. The patch above is a heuristic that estimates that the latter
situation is the less frequent one.
In general, we don't have such specific information, and the the cost of zero is
a good choice then.
We could add a parameter to force_var_cost that indicates this choice, that
would perhaps be a better fix.
As for the reasoning related to the const set, that is something that indeed
holds more general, and could be implemented in force_var_cost, which is what
you're suggesting if I understand you correctly.
The tentative patch below explores these last 2 ideas.
Thanks,
-Tom
diff -u gcc/tree-ssa-loop-ivopts.c (working copy) gcc/tree-ssa-loop-ivopts.c (working copy)
--- gcc/tree-ssa-loop-ivopts.c (working copy)
+++ gcc/tree-ssa-loop-ivopts.c (working copy)
@@ -3722,16 +3722,31 @@
invariants the computation depends on. */
static comp_cost
-force_var_cost (struct ivopts_data *data,
- tree expr, bitmap *depends_on)
+force_var_cost (struct ivopts_data *data, tree expr, bool var_costs_regcopy,
+ bitmap *depends_on)
{
+ comp_cost cost;
+
if (depends_on)
{
fd_ivopts_data = data;
walk_tree (&expr, find_depends, depends_on, NULL);
}
- return force_expr_to_var_cost (expr, data->speed);
+ STRIP_NOPS (expr);
+ if (SSA_VAR_P (expr))
+ {
+ cost = zero_cost;
+ if (var_costs_regcopy)
+ cost.cost = COSTS_N_INSNS (1);
+ return cost;
+ }
+
+ cost = force_expr_to_var_cost (expr, data->speed);
+ if (cost.cost == 0)
+ cost.cost = COSTS_N_INSNS (1);
+
+ return cost;
}
/* Estimates cost of expressing address ADDR as var + symbol + offset. The
@@ -3817,7 +3832,8 @@
aff_combination_scale (&aff_e2, double_int_minus_one);
aff_combination_add (&aff_e1, &aff_e2);
- return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
+ return force_var_cost (data, aff_combination_to_tree (&aff_e1), false,
+ depends_on);
}
/* Estimates cost of expressing difference E1 - E2 as
@@ -3857,11 +3873,11 @@
*var_present = true;
if (integer_zerop (e2))
- return force_var_cost (data, e1, depends_on);
+ return force_var_cost (data, e1, false, depends_on);
if (integer_zerop (e1))
{
- comp_cost cost = force_var_cost (data, e2, depends_on);
+ comp_cost cost = force_var_cost (data, e2, false, depends_on);
cost.cost += multiply_by_cost (-1, mode, data->speed);
return cost;
}
@@ -3872,7 +3888,8 @@
aff_combination_scale (&aff_e2, double_int_minus_one);
aff_combination_add (&aff_e1, &aff_e2);
- return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
+ return force_var_cost (data, aff_combination_to_tree (&aff_e1), false,
+ depends_on);
}
/* Returns true if AFF1 and AFF2 are identical. */
@@ -4162,7 +4179,7 @@
}
else
{
- cost = force_var_cost (data, cbase, depends_on);
+ cost = force_var_cost (data, cbase, false, depends_on);
cost = add_costs (cost,
difference_cost (data,
ubase, build_int_cst (utype, 0),
@@ -4487,22 +4504,18 @@
return true;
}
- /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
- be copied, if is is used in the loop body and DATA->body_includes_call. */
+/* Attempts to determine whether a var EXPR can be used in the loop body
+ of DATA->current_loop, or whether a regcopy is needed. */
-static int
-parm_decl_cost (struct ivopts_data *data, tree bound)
+static bool
+use_in_loop_needs_copy (struct ivopts_data *data, tree expr)
{
- tree sbound = bound;
- STRIP_NOPS (sbound);
-
- if (TREE_CODE (sbound) == SSA_NAME
- && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
- && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
- && data->body_includes_call)
- return COSTS_N_INSNS (1);
+ STRIP_NOPS (expr);
- return 0;
+ return (TREE_CODE (expr) == SSA_NAME
+ && TREE_CODE (SSA_NAME_VAR (expr)) == PARM_DECL
+ && gimple_nop_p (SSA_NAME_DEF_STMT (expr))
+ && data->body_includes_call);
}
/* Determines cost of basing replacement of USE on CAND in a condition. */
@@ -4529,10 +4542,10 @@
/* Try iv elimination. */
if (may_eliminate_iv (data, use, cand, &bound))
{
- elim_cost = force_var_cost (data, bound, &depends_on_elim);
- if (elim_cost.cost == 0)
- elim_cost.cost = parm_decl_cost (data, bound);
- else if (TREE_CODE (bound) == INTEGER_CST)
+ elim_cost = force_var_cost (data, bound,
+ use_in_loop_needs_copy (data, bound),
+ &depends_on_elim);
+ if (TREE_CODE (bound) == INTEGER_CST)
elim_cost.cost = 0;
/* If we replace a loop condition 'i < n' with 'p < base + n',
depends_on_elim will have 'base' and 'n' set, which implies
@@ -4577,10 +4590,9 @@
walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
/* Count the cost of the original bound as well. */
- bound_cost = force_var_cost (data, *bound_cst, NULL);
- if (bound_cost.cost == 0)
- bound_cost.cost = parm_decl_cost (data, *bound_cst);
- else if (TREE_CODE (*bound_cst) == INTEGER_CST)
+ bound_cost = force_var_cost (data, *bound_cst,
+ use_in_loop_needs_copy (data, *bound_cst), NULL);
+ if (TREE_CODE (*bound_cst) == INTEGER_CST)
bound_cost.cost = 0;
express_cost.cost += bound_cost.cost;
@@ -4804,12 +4816,10 @@
that rolls enough, so we take it just very little into account. */
base = cand->iv->base;
- cost_base = force_var_cost (data, base, NULL);
/* It will be exceptional that the iv register happens to be initialized with
the proper value at no cost. In general, there will at least be a regcopy
or a const set. */
- if (cost_base.cost == 0)
- cost_base.cost = COSTS_N_INSNS (1);
+ cost_base = force_var_cost (data, base, true, NULL);
cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
cost = cost_step + adjust_setup_cost (data, cost_base.cost);