Thanks for doing this.

kugan.vivekanandara...@linaro.org writes:
> From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org>
>
> gcc/ChangeLog:
>
> 2019-05-15  Kugan Vivekanandarajah  <kugan.vivekanandara...@linaro.org>
>
>       PR target/88834
>       * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
>       IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
>       (find_interesting_uses_stmt): Likewise.
>       (get_alias_ptr_type_for_ptr_address): Likewise.
>       (add_iv_candidate_for_use): Add scaled index candidate if useful.
>
> Change-Id: I8e8151fe2dde2845dedf38b090103694da6fc9d1
> ---
>  gcc/tree-ssa-loop-ivopts.c | 60 
> +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 9864b59..115a70c 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -2451,11 +2451,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
>    switch (gimple_call_internal_fn (call))
>      {
>      case IFN_MASK_LOAD:
> +    case IFN_MASK_LOAD_LANES:
>        if (op_p == gimple_call_arg_ptr (call, 0))
>       return TREE_TYPE (gimple_call_lhs (call));
>        return NULL_TREE;
>  
>      case IFN_MASK_STORE:
> +    case IFN_MASK_STORE_LANES:
>        if (op_p == gimple_call_arg_ptr (call, 0))
>       return TREE_TYPE (gimple_call_arg (call, 3));
>        return NULL_TREE;
> @@ -2545,7 +2547,7 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> gimple *stmt)
>         return;
>       }
>  
> -      /* TODO -- we should also handle address uses of type
> +      /* TODO -- we should also handle all address uses of type
>  
>        memory = call (whatever);
>  
> @@ -2553,6 +2555,27 @@ find_interesting_uses_stmt (struct ivopts_data *data, 
> gimple *stmt)
>  
>        call (memory).  */
>      }
> +  else if (is_gimple_call (stmt))
> +    {
> +      gcall *call = dyn_cast <gcall *> (stmt);
> +      if (call
> +       && gimple_call_internal_p (call)
> +       && (gimple_call_internal_fn (call) == IFN_MASK_LOAD_LANES
> +           || gimple_call_internal_fn (call) == IFN_MASK_STORE_LANES))
> +     {
> +       tree *arg = gimple_call_arg_ptr (call, 0);
> +       struct iv *civ = get_iv (data, *arg);
> +       tree mem_type = get_mem_type_for_internal_fn (call, arg);
> +       if (civ && mem_type)
> +         {
> +           civ = alloc_iv (data, civ->base, civ->step);
> +           record_group_use (data, arg, civ, stmt, USE_PTR_ADDRESS,
> +                             mem_type);
> +           return;
> +         }
> +     }
> +    }
> +

Why do you need to handle this specially?  Does:

  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
    {
      op = USE_FROM_PTR (use_p);

      if (TREE_CODE (op) != SSA_NAME)
        continue;

      iv = get_iv (data, op);
      if (!iv)
        continue;

      if (!find_address_like_use (data, stmt, use_p->use, iv))
        find_interesting_uses_op (data, op);
    }

not do the right thing for the load/store lane case?

> @@ -3500,6 +3523,39 @@ add_iv_candidate_for_use (struct ivopts_data *data, 
> struct iv_use *use)
>      basetype = sizetype;
>    record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
>  
> +  /* Compare the cost of an address with an unscaled index with the cost of
> +    an address with a scaled index and add candidate if useful. */
> +  if (use != NULL && use->type == USE_PTR_ADDRESS)

I think we want this for all address uses.  E.g. for SVE, masked and
unmasked accesses would both benefit.

> +    {
> +      struct mem_address parts = {NULL_TREE, integer_one_node,
> +                               NULL_TREE, NULL_TREE, NULL_TREE};

Might be better to use "= {}" and initialise the fields that matter by
assignment.  As it stands this uses integer_one_node as the base, but I
couldn't tell if that was deliberate.

> +      poly_uint64 temp;
> +      poly_int64 fact;
> +      bool speed = optimize_loop_for_speed_p (data->current_loop);
> +      poly_int64 poly_step = tree_to_poly_int64 (iv->step);

The step could be variable, so we should check whether this holds
using poly_int_tree_p.

> +      machine_mode mem_mode = TYPE_MODE (use->mem_type);
> +      addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
> +
> +      fact = GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (use->mem_type)));

This is simpler as:

  GET_MODE_UNIT_SIZE (TYPE_MODE (use->mem_type));

It's always a compile-time constant, so "fact" can be int/unsigned int
rather than poly_int64.

> +      parts.index = integer_one_node;
> +
> +      if (fact.is_constant ()
> +       && can_div_trunc_p (poly_step, fact, &temp))

I think it only makes sense to add the candidate if poly_step is an exact
multiple of fact, so I think we should use multiple_p here.

> +     {
> +       /* Addressing mode "base + index".  */
> +       rtx addr = addr_for_mem_ref (&parts, as, false);
> +       unsigned cost = address_cost (addr, mem_mode, as, speed);
> +       tree step = wide_int_to_tree (sizetype,
> +                                     exact_div (poly_step, fact));

The multiple_p mentioned above would provide this result too.
We only need to calculate "step" if we decided to add the candidate,
so I think it should be in the "if" below.

> +       parts.step = wide_int_to_tree (sizetype, fact);
> +       /* Addressing mode "base + index << scale".  */
> +       addr = addr_for_mem_ref (&parts, as, false);
> +       unsigned new_cost = address_cost (addr, mem_mode, as, speed);
> +       if (new_cost < cost)

I think it'd be worth splitting the guts of this check out into a helper,
since it's something that could be reusable.  Maybe:

  unsigned int preferred_mem_scalar_factor (machine_mode);

with the only supported values for now being 1 and GET_MODE_INNER_SIZE.
(Could be extended later if a target needs it.)

Thanks,
Richard

Reply via email to