On Wed, Jan 22, 2014 at 2:23 PM, Wei Mi <w...@google.com> wrote:
> This patch handles the mem access builtins in ivopt. The original
> problem described here:
> http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02648.html
>
> Bootstrapped and passed regression test. Performance test ok for
> plain, fdo and lipo. Ok for google 4.8 branch?
>
> Thanks,
> Wei.
>
> --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/expr.c
> 2013-12-21 21:16:11.006695060 -0800
> +++ gcc/expr.c  2013-12-09 11:16:22.698063077 -0800
> @@ -7537,7 +7537,21 @@
>           tem = fold_build_pointer_plus (tem, TREE_OPERAND (exp, 1));
>         return expand_expr (tem, target, tmode, modifier);
>        }
> +    case TARGET_MEM_REF:
> +      {

Add more comments here to describe the new code.

> +       struct mem_address addr;
> +       int old_cse_not_expected;
> +       addr_space_t as
> +         = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0))));
>
> +       get_address_description (exp, &addr);
> +       result = addr_for_mem_ref (&addr, as, true);
> +       old_cse_not_expected = cse_not_expected;
> +       cse_not_expected = 1;
> +       result = memory_address_addr_space (tmode, result, as);
> +       cse_not_expected = old_cse_not_expected;
> +       return result;
> +      }
>      case CONST_DECL:
>        /* Expand the initializer like constants above.  */
>        result = XEXP (expand_expr_constant (DECL_INITIAL (exp),
> @@ -9579,10 +9593,14 @@
>         struct mem_address addr;
>         enum insn_code icode;
>         unsigned int align;
> +       int old_cse_not_expected;
>
>         get_address_description (exp, &addr);
>         op0 = addr_for_mem_ref (&addr, as, true);
> +       old_cse_not_expected = cse_not_expected;

Comments on the change.

> +       cse_not_expected = 1;
>         op0 = memory_address_addr_space (mode, op0, as);
> +       cse_not_expected = old_cse_not_expected;
>         temp = gen_rtx_MEM (mode, op0);
>         set_mem_attributes (temp, exp, 0);
>         set_mem_addr_space (temp, as);
> --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/target.def
>     2013-12-21 21:16:10.926694446 -0800
> +++ gcc/target.def      2013-12-09 10:46:07.794472365 -0800
> @@ -1119,6 +1119,14 @@
>  #undef HOOK_PREFIX
>  #define HOOK_PREFIX "TARGET_"
>
> +DEFHOOK
> +(builtin_has_mem_ref_p,
> + "This hook return whether the @var{i}th param of the 
> @var{builtin_function}\n\
> +is a memory reference. If @var{i} is -1, return whether the
> @var{builtin_function}\n\
> +contains any memory reference type param.",
> + bool, (int builtin_function, int i),
> + default_builtin_has_mem_ref_p)
> +
>  /* Allow target specific overriding of option settings after options have
>    been changed by an attribute or pragma or when it is reset at the
>    end of the code affected by an attribute or pragma.  */
> --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/gimple.c
>       2013-12-21 21:16:11.066695519 -0800
> +++ gcc/gimple.c        2013-12-09 10:50:09.096289179 -0800
> @@ -2574,7 +2574,8 @@
>  is_gimple_addressable (tree t)
>  {
>    return (is_gimple_id (t) || handled_component_p (t)
> -         || TREE_CODE (t) == MEM_REF);
> +         || TREE_CODE (t) == MEM_REF
> +         || TREE_CODE (t) == TARGET_MEM_REF);
>  }
>
>  /* Return true if T is a valid gimple constant.  */
> @@ -2625,7 +2626,8 @@
>        op = TREE_OPERAND (op, 0);
>      }
>
> -  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF)
> +  if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF
> +      || TREE_CODE (op) == TARGET_MEM_REF)
>      return true;
>
>    switch (TREE_CODE (op))
> --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/targhooks.c
>    2013-12-21 21:16:10.926694446 -0800
> +++ gcc/targhooks.c     2013-12-09 10:45:50.154339207 -0800
> @@ -543,6 +543,13 @@
>  }
>
>  bool
> +default_builtin_has_mem_ref_p (int built_in_function ATTRIBUTE_UNUSED,
> +                              int i ATTRIBUTE_UNUSED)
> +{
> +  return false;
> +}
> +
> +bool
>  hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
>         cumulative_args_t ca ATTRIBUTE_UNUSED,
>         enum machine_mode mode ATTRIBUTE_UNUSED,
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/tree-ssa-loop-ivopts.c
> 2014-01-22 11:57:10.420073494 -0800
> +++ gcc/tree-ssa-loop-ivopts.c  2014-01-22 14:00:14.177908568 -0800
> @@ -96,7 +96,7 @@
>  /* The infinite cost.  */
>  #define INFTY 10000000
>
> -#define AVG_LOOP_NITER(LOOP) 5
> +#define AVG_LOOP_NITER(LOOP) 50

Need a parameter to control. This should also go in as an independent patch.


David

>
>  /* Returns the expected number of loop iterations for LOOP.
>     The average trip count is computed from profile data if it
> @@ -1785,7 +1785,8 @@
>
>        /* Check that the base expression is addressable.  This needs
>          to be done after substituting bases of IVs into it.  */
> -      if (may_be_nonaddressable_p (base))
> +      if (may_be_nonaddressable_p (base)
> +         && REFERENCE_CLASS_P (base))
>         goto fail;
>
>        /* Moreover, on strict alignment platforms, check that it is
> @@ -1793,7 +1794,11 @@
>        if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
>         goto fail;
>
> -      base = build_fold_addr_expr (base);
> +      /* If base is of reference class, build its addr expr here. If
> +        base is already an address, then don't need to build addr
> +        expr.  */
> +      if (REFERENCE_CLASS_P (base))
> +       base = build_fold_addr_expr (base);
>
>        /* Substituting bases of IVs into the base expression might
>          have caused folding opportunities.  */
> @@ -1837,13 +1842,36 @@
>      }
>  }
>
> +/* Find whether the Ith param of the BUILTIN is a mem
> +   reference. If I is -1, it returns whether the BUILTIN
> +   contains any mem reference type param.  */
> +
> +static bool
> +builtin_has_mem_ref_p (gimple builtin, int i)
> +{
> +  tree fndecl = gimple_call_fndecl (builtin);
> +  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
> +    {
> +      switch (DECL_FUNCTION_CODE (fndecl))
> +       {
> +       case BUILT_IN_PREFETCH:
> +         if (i == -1 || i == 0)
> +           return true;
> +       }
> +    }
> +  else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
> +    return targetm.builtin_has_mem_ref_p ((int) DECL_FUNCTION_CODE
> (fndecl), i);
> +
> +  return false;
> +}
> +
>  /* Finds interesting uses of induction variables in the statement STMT.  */
>
>  static void
>  find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
>  {
>    struct iv *iv;
> -  tree op, *lhs, *rhs;
> +  tree op, *lhs, *rhs, callee;
>    ssa_op_iter iter;
>    use_operand_p use_p;
>    enum tree_code code;
> @@ -1891,14 +1919,36 @@
>           find_interesting_uses_cond (data, stmt);
>           return;
>         }
> +    }
> +  /* Handle builtin call which could be expanded to insn
> +     with memory access operands. Generate USE_ADDRESS type
> +     use for address expr which is used for memory access of
> +     such builtin.  */
> +  else if (is_gimple_call (stmt)
> +          && (callee = gimple_call_fndecl (stmt))
> +          && is_builtin_fn (callee)
> +          && builtin_has_mem_ref_p (stmt, -1))
> +    {
> +      size_t i;
> +      for (i = 0; i < gimple_call_num_args (stmt); i++)
> +       {
> +         if (builtin_has_mem_ref_p (stmt, i))
> +           {
> +             tree *arg = gimple_call_arg_ptr (stmt, i);
>
> -      /* TODO -- we should also handle address uses of type
> -
> -        memory = call (whatever);
> -
> -        and
> +             if (TREE_CODE (*arg) != SSA_NAME)
> +               continue;
>
> -        call (memory).  */
> +             gcc_assert (POINTER_TYPE_P (TREE_TYPE (*arg)));
> +             find_interesting_uses_address (data, stmt, arg);
> +           }
> +         else
> +           {
> +             tree arg = gimple_call_arg (stmt, i);
> +             find_interesting_uses_op (data, arg);
> +           }
> +       }
> +      return;
>      }
>
>    if (gimple_code (stmt) == GIMPLE_PHI
> @@ -3845,9 +3895,12 @@
>  {
>    aff_tree ubase_aff, cbase_aff;
>    tree expr, ub, cb;
> +  unsigned HOST_WIDE_INT uoffset, coffset;
>
>    STRIP_NOPS (ubase);
>    STRIP_NOPS (cbase);
> +  ubase = strip_offset (ubase, &uoffset);
> +  cbase = strip_offset (cbase, &coffset);
>    ub = ubase;
>    cb = cbase;
>
> @@ -6282,7 +6335,7 @@
>    aff_tree aff;
>    gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
>    tree base_hint = NULL_TREE;
> -  tree ref, iv;
> +  tree ref, iv, callee;
>    bool ok;
>
>    adjust_iv_update_pos (cand, use);
> @@ -6305,10 +6358,41 @@
>      base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
>
>    iv = var_at_stmt (data->current_loop, cand, use->stmt);
> -  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
> -                       reference_alias_ptr_type (*use->op_p),
> -                       iv, base_hint, data->speed);
> -  copy_ref_info (ref, *use->op_p);
> +
> +  /*  For builtin_call(addr_expr), change it to:
> +       tmp = ADDR_EXPR(TMR(...));
> +       call (tmp);  */
> +  if (is_gimple_call (use->stmt)
> +      && (callee = gimple_call_fndecl (use->stmt))
> +      && is_builtin_fn (callee))
> +    {
> +      gimple g;
> +      gimple_seq seq = NULL;
> +      tree addr;
> +      tree type = TREE_TYPE (TREE_TYPE (*use->op_p));
> +      location_t loc = gimple_location (use->stmt);
> +      gimple_stmt_iterator gsi = gsi_for_stmt (use->stmt);
> +
> +      ref = create_mem_ref (&bsi, type, &aff,
> +                           TREE_TYPE (*use->op_p),
> +                           iv, base_hint, data->speed);
> +      addr = build1 (ADDR_EXPR, TREE_TYPE (*use->op_p), ref);
> +      g = gimple_build_assign_with_ops (ADDR_EXPR,
> +                               make_ssa_name (TREE_TYPE (*use->op_p), NULL),
> +                               addr, NULL);
> +      gimple_set_location (g, loc);
> +      gimple_seq_add_stmt_without_update (&seq, g);
> +      gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
> +
> +      ref = gimple_assign_lhs (g);
> +    }
> +  else
> +    {
> +      ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
> +                           reference_alias_ptr_type (*use->op_p),
> +                           iv, base_hint, data->speed);
> +      copy_ref_info (ref, *use->op_p);
> +    }
>    *use->op_p = ref;
>  }
>
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/config/i386/i386.c
>     2013-12-21 21:15:57.326590210 -0800
> +++ gcc/config/i386/i386.c      2013-12-09 10:46:07.774472216 -0800
> @@ -29204,6 +29204,50 @@
>      }
>  }
>
> +/* Return whether the Ith param of the BUILTIN_FUNCTION
> +   is a memory reference. If I == -1, return whether the
> +   BUILTIN_FUNCTION contains any memory reference param.  */
> +
> +static bool
> +ix86_builtin_has_mem_ref_p (int builtin_function, int i)
> +{
> +  switch ((enum ix86_builtins) builtin_function)
> +    {
> +    /* LOAD.  */
> +    case IX86_BUILTIN_LOADHPS:
> +    case IX86_BUILTIN_LOADLPS:
> +    case IX86_BUILTIN_LOADHPD:
> +    case IX86_BUILTIN_LOADLPD:
> +      if (i == -1 || i == 1)
> +       return true;
> +      break;
> +    case IX86_BUILTIN_LOADUPS:
> +    case IX86_BUILTIN_LOADUPD:
> +    case IX86_BUILTIN_LOADDQU:
> +    case IX86_BUILTIN_LOADUPD256:
> +    case IX86_BUILTIN_LOADUPS256:
> +    case IX86_BUILTIN_LOADDQU256:
> +    case IX86_BUILTIN_LDDQU256:
> +      if (i == -1 || i == 0)
> +       return true;
> +      break;
> +    /* STORE.  */
> +    case IX86_BUILTIN_STOREHPS:
> +    case IX86_BUILTIN_STORELPS:
> +    case IX86_BUILTIN_STOREUPS:
> +    case IX86_BUILTIN_STOREUPD:
> +    case IX86_BUILTIN_STOREDQU:
> +    case IX86_BUILTIN_STOREUPD256:
> +    case IX86_BUILTIN_STOREUPS256:
> +    case IX86_BUILTIN_STOREDQU256:
> +      if (i == -1 || i == 0)
> +       return true;
> +    default:
> +      break;
> +    }
> +  return false;
> +}
> +
>  /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
>     to return a pointer to VERSION_DECL if the outcome of the expression
>     formed by PREDICATE_CHAIN is true.  This function will be called during
> @@ -32080,7 +32124,13 @@
>           if (i == memory)
>             {
>               /* This must be the memory operand.  */
> -             op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
> +
> +             /* We expect the builtin could be expanded to rtl with memory
> +                operand and proper addressing mode will be kept as specified
> +                in TARGET_MEM_REF.  */
> +             if (!(TREE_CODE (arg) == ADDR_EXPR
> +                   && TREE_CODE (TREE_OPERAND (arg, 0)) == TARGET_MEM_REF))
> +               op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
>               op = gen_rtx_MEM (mode, op);
>               gcc_assert (GET_MODE (op) == mode
>                           || GET_MODE (op) == VOIDmode);
> @@ -43035,6 +43085,9 @@
>  #undef TARGET_ASM_FUNCTION_PROLOGUE
>  #define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue
>
> +#undef TARGET_BUILTIN_HAS_MEM_REF_P
> +#define TARGET_BUILTIN_HAS_MEM_REF_P ix86_builtin_has_mem_ref_p
> +
>  #undef TARGET_ASM_FUNCTION_EPILOGUE
>  #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
>
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/doc/tm.texi.in
> 2013-12-21 21:08:29.623160737 -0800
> +++ gcc/doc/tm.texi.in  2013-12-09 10:45:50.124338984 -0800
> @@ -696,6 +696,8 @@
>
>  @hook TARGET_CHECK_STRING_OBJECT_FORMAT_ARG
>
> +@hook TARGET_BUILTIN_HAS_MEM_REF_P
> +
>  @hook TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
>  This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE}
>  but is called when the optimize level is changed via an attribute or
> --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/doc/tm.texi
>    2013-12-21 21:08:29.613160659 -0800
> +++ gcc/doc/tm.texi     2013-12-09 10:45:50.144339134 -0800
> @@ -708,6 +708,12 @@
>  If a target implements string objects then this hook should should
> provide a facility to check the function arguments in @var{args_list}
> against the format specifiers in @var{format_arg} where the type of
> @var{format_arg} is one recognized as a valid string reference type.
>  @end deftypefn
>
> +@deftypefn {Target Hook} bool TARGET_BUILTIN_HAS_MEM_REF_P (int
> @var{builtin_function}, int @var{i})
> +This hook return whether the @var{i}th param of the @var{builtin_function}
> +is a memory reference. If @var{i} is -1, return whether the
> @var{builtin_function}
> +contains any memory reference type param.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} void TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE (void)
>  This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE}
>  but is called when the optimize level is changed via an attribute or
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c
>       2013-12-21 21:12:54.275187550 -0800
> +++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c        2014-01-22
> 11:12:44.108736315 -0800
> @@ -20,5 +20,5 @@
>    return s;
>  }
>
> -/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
> +/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */
>  /* { dg-final { cleanup-tree-dump "ivopts" } } */
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
>       2013-12-21 21:12:54.305187781 -0800
> +++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c        2014-01-21
> 23:01:16.220074850 -0800
> @@ -21,5 +21,5 @@
>    return s;
>  }
>
> -/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
> +/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
>  /* { dg-final { cleanup-tree-dump "ivopts" } } */
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_5.c
>    2014-01-22 14:03:29.569415050 -0800
> +++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_5.c     2014-01-22
> 11:05:43.715445996 -0800
> @@ -0,0 +1,21 @@
> +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
> +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
> +
> +/* Make sure only one iv is selected after IVOPT.  */
> +
> +#include <x86intrin.h>
> +extern __m128i arr[], d[];
> +void test (void)
> +{
> +    unsigned int b;
> +    for (b = 0; b < 1000; b += 2) {
> +      __m128i *p = (__m128i *)(&d[b]);
> +      __m128i a = _mm_load_si128(&arr[4*b+3]);
> +      __m128i v = _mm_loadu_si128(p);
> +      v = _mm_xor_si128(v, a);
> +      _mm_storeu_si128(p, v);
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
> +/* { dg-final { cleanup-tree-dump "ivopts" } } */
> --- 
> /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/loop-28.c
>    2013-12-21 21:12:54.305187781 -0800
> +++ gcc/testsuite/gcc.dg/tree-ssa/loop-28.c     2014-01-22
> 11:11:31.388167897 -0800
> @@ -14,7 +14,13 @@
>  /* There should be 64 MEMs in the unrolled loop and one more in the
> copy of the loop
>     for the rest of the iterations.  */
>
> -/* { dg-final { scan-tree-dump-times "MEM" 65 "optimized" } } */
> +/* We generate prefetch(addr) as:
> +   tmp = &MEM(addr);
> +   prefetch(tmp);
> +   to let ivopt understand the memory access mode better, so there is
> another MEM in
> +   unrolled loop.  */
> +
> +/* { dg-final { scan-tree-dump-times "MEM" 66 "optimized" } } */
>
>  /* There should be no i_a = i_b assignments.  */
>  /* { dg-final { scan-tree-dump-times "i_.*= i_\[0-9\]*;" 0 "aprefetch" } } */

Reply via email to