On Wed, Jan 22, 2014 at 2:23 PM, Wei Mi <w...@google.com> wrote: > This patch handles the mem access builtins in ivopt. The original > problem described here: > http://gcc.gnu.org/ml/gcc-patches/2013-11/msg02648.html > > Bootstrapped and passed regression test. Performance test ok for > plain, fdo and lipo. Ok for google 4.8 branch? > > Thanks, > Wei. > > --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/expr.c > 2013-12-21 21:16:11.006695060 -0800 > +++ gcc/expr.c 2013-12-09 11:16:22.698063077 -0800 > @@ -7537,7 +7537,21 @@ > tem = fold_build_pointer_plus (tem, TREE_OPERAND (exp, 1)); > return expand_expr (tem, target, tmode, modifier); > } > + case TARGET_MEM_REF: > + {
Add more comments here to describe the new code. > + struct mem_address addr; > + int old_cse_not_expected; > + addr_space_t as > + = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0)))); > > + get_address_description (exp, &addr); > + result = addr_for_mem_ref (&addr, as, true); > + old_cse_not_expected = cse_not_expected; > + cse_not_expected = 1; > + result = memory_address_addr_space (tmode, result, as); > + cse_not_expected = old_cse_not_expected; > + return result; > + } > case CONST_DECL: > /* Expand the initializer like constants above. */ > result = XEXP (expand_expr_constant (DECL_INITIAL (exp), > @@ -9579,10 +9593,14 @@ > struct mem_address addr; > enum insn_code icode; > unsigned int align; > + int old_cse_not_expected; > > get_address_description (exp, &addr); > op0 = addr_for_mem_ref (&addr, as, true); > + old_cse_not_expected = cse_not_expected; Comments on the change. > + cse_not_expected = 1; > op0 = memory_address_addr_space (mode, op0, as); > + cse_not_expected = old_cse_not_expected; > temp = gen_rtx_MEM (mode, op0); > set_mem_attributes (temp, exp, 0); > set_mem_addr_space (temp, as); > --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/target.def > 2013-12-21 21:16:10.926694446 -0800 > +++ gcc/target.def 2013-12-09 10:46:07.794472365 -0800 > @@ -1119,6 +1119,14 @@ > #undef HOOK_PREFIX > #define HOOK_PREFIX "TARGET_" > > +DEFHOOK > +(builtin_has_mem_ref_p, > + "This hook return whether the @var{i}th param of the > @var{builtin_function}\n\ > +is a memory reference. If @var{i} is -1, return whether the > @var{builtin_function}\n\ > +contains any memory reference type param.", > + bool, (int builtin_function, int i), > + default_builtin_has_mem_ref_p) > + > /* Allow target specific overriding of option settings after options have > been changed by an attribute or pragma or when it is reset at the > end of the code affected by an attribute or pragma. */ > --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/gimple.c > 2013-12-21 21:16:11.066695519 -0800 > +++ gcc/gimple.c 2013-12-09 10:50:09.096289179 -0800 > @@ -2574,7 +2574,8 @@ > is_gimple_addressable (tree t) > { > return (is_gimple_id (t) || handled_component_p (t) > - || TREE_CODE (t) == MEM_REF); > + || TREE_CODE (t) == MEM_REF > + || TREE_CODE (t) == TARGET_MEM_REF); > } > > /* Return true if T is a valid gimple constant. */ > @@ -2625,7 +2626,8 @@ > op = TREE_OPERAND (op, 0); > } > > - if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF) > + if (CONSTANT_CLASS_P (op) || TREE_CODE (op) == MEM_REF > + || TREE_CODE (op) == TARGET_MEM_REF) > return true; > > switch (TREE_CODE (op)) > --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/targhooks.c > 2013-12-21 21:16:10.926694446 -0800 > +++ gcc/targhooks.c 2013-12-09 10:45:50.154339207 -0800 > @@ -543,6 +543,13 @@ > } > > bool > +default_builtin_has_mem_ref_p (int built_in_function ATTRIBUTE_UNUSED, > + int i ATTRIBUTE_UNUSED) > +{ > + return false; > +} > + > +bool > hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false ( > cumulative_args_t ca ATTRIBUTE_UNUSED, > enum machine_mode mode ATTRIBUTE_UNUSED, > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/tree-ssa-loop-ivopts.c > 2014-01-22 11:57:10.420073494 -0800 > +++ gcc/tree-ssa-loop-ivopts.c 2014-01-22 14:00:14.177908568 -0800 > @@ -96,7 +96,7 @@ > /* The infinite cost. */ > #define INFTY 10000000 > > -#define AVG_LOOP_NITER(LOOP) 5 > +#define AVG_LOOP_NITER(LOOP) 50 Need a parameter to control. This should also go in as an independent patch. David > > /* Returns the expected number of loop iterations for LOOP. > The average trip count is computed from profile data if it > @@ -1785,7 +1785,8 @@ > > /* Check that the base expression is addressable. This needs > to be done after substituting bases of IVs into it. */ > - if (may_be_nonaddressable_p (base)) > + if (may_be_nonaddressable_p (base) > + && REFERENCE_CLASS_P (base)) > goto fail; > > /* Moreover, on strict alignment platforms, check that it is > @@ -1793,7 +1794,11 @@ > if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step)) > goto fail; > > - base = build_fold_addr_expr (base); > + /* If base is of reference class, build its addr expr here. If > + base is already an address, then don't need to build addr > + expr. */ > + if (REFERENCE_CLASS_P (base)) > + base = build_fold_addr_expr (base); > > /* Substituting bases of IVs into the base expression might > have caused folding opportunities. */ > @@ -1837,13 +1842,36 @@ > } > } > > +/* Find whether the Ith param of the BUILTIN is a mem > + reference. If I is -1, it returns whether the BUILTIN > + contains any mem reference type param. */ > + > +static bool > +builtin_has_mem_ref_p (gimple builtin, int i) > +{ > + tree fndecl = gimple_call_fndecl (builtin); > + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) > + { > + switch (DECL_FUNCTION_CODE (fndecl)) > + { > + case BUILT_IN_PREFETCH: > + if (i == -1 || i == 0) > + return true; > + } > + } > + else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) > + return targetm.builtin_has_mem_ref_p ((int) DECL_FUNCTION_CODE > (fndecl), i); > + > + return false; > +} > + > /* Finds interesting uses of induction variables in the statement STMT. */ > > static void > find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt) > { > struct iv *iv; > - tree op, *lhs, *rhs; > + tree op, *lhs, *rhs, callee; > ssa_op_iter iter; > use_operand_p use_p; > enum tree_code code; > @@ -1891,14 +1919,36 @@ > find_interesting_uses_cond (data, stmt); > return; > } > + } > + /* Handle builtin call which could be expanded to insn > + with memory access operands. Generate USE_ADDRESS type > + use for address expr which is used for memory access of > + such builtin. */ > + else if (is_gimple_call (stmt) > + && (callee = gimple_call_fndecl (stmt)) > + && is_builtin_fn (callee) > + && builtin_has_mem_ref_p (stmt, -1)) > + { > + size_t i; > + for (i = 0; i < gimple_call_num_args (stmt); i++) > + { > + if (builtin_has_mem_ref_p (stmt, i)) > + { > + tree *arg = gimple_call_arg_ptr (stmt, i); > > - /* TODO -- we should also handle address uses of type > - > - memory = call (whatever); > - > - and > + if (TREE_CODE (*arg) != SSA_NAME) > + continue; > > - call (memory). */ > + gcc_assert (POINTER_TYPE_P (TREE_TYPE (*arg))); > + find_interesting_uses_address (data, stmt, arg); > + } > + else > + { > + tree arg = gimple_call_arg (stmt, i); > + find_interesting_uses_op (data, arg); > + } > + } > + return; > } > > if (gimple_code (stmt) == GIMPLE_PHI > @@ -3845,9 +3895,12 @@ > { > aff_tree ubase_aff, cbase_aff; > tree expr, ub, cb; > + unsigned HOST_WIDE_INT uoffset, coffset; > > STRIP_NOPS (ubase); > STRIP_NOPS (cbase); > + ubase = strip_offset (ubase, &uoffset); > + cbase = strip_offset (cbase, &coffset); > ub = ubase; > cb = cbase; > > @@ -6282,7 +6335,7 @@ > aff_tree aff; > gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt); > tree base_hint = NULL_TREE; > - tree ref, iv; > + tree ref, iv, callee; > bool ok; > > adjust_iv_update_pos (cand, use); > @@ -6305,10 +6358,41 @@ > base_hint = var_at_stmt (data->current_loop, cand, use->stmt); > > iv = var_at_stmt (data->current_loop, cand, use->stmt); > - ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, > - reference_alias_ptr_type (*use->op_p), > - iv, base_hint, data->speed); > - copy_ref_info (ref, *use->op_p); > + > + /* For builtin_call(addr_expr), change it to: > + tmp = ADDR_EXPR(TMR(...)); > + call (tmp); */ > + if (is_gimple_call (use->stmt) > + && (callee = gimple_call_fndecl (use->stmt)) > + && is_builtin_fn (callee)) > + { > + gimple g; > + gimple_seq seq = NULL; > + tree addr; > + tree type = TREE_TYPE (TREE_TYPE (*use->op_p)); > + location_t loc = gimple_location (use->stmt); > + gimple_stmt_iterator gsi = gsi_for_stmt (use->stmt); > + > + ref = create_mem_ref (&bsi, type, &aff, > + TREE_TYPE (*use->op_p), > + iv, base_hint, data->speed); > + addr = build1 (ADDR_EXPR, TREE_TYPE (*use->op_p), ref); > + g = gimple_build_assign_with_ops (ADDR_EXPR, > + make_ssa_name (TREE_TYPE (*use->op_p), NULL), > + addr, NULL); > + gimple_set_location (g, loc); > + gimple_seq_add_stmt_without_update (&seq, g); > + gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); > + > + ref = gimple_assign_lhs (g); > + } > + else > + { > + ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, > + reference_alias_ptr_type (*use->op_p), > + iv, base_hint, data->speed); > + copy_ref_info (ref, *use->op_p); > + } > *use->op_p = ref; > } > > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/config/i386/i386.c > 2013-12-21 21:15:57.326590210 -0800 > +++ gcc/config/i386/i386.c 2013-12-09 10:46:07.774472216 -0800 > @@ -29204,6 +29204,50 @@ > } > } > > +/* Return whether the Ith param of the BUILTIN_FUNCTION > + is a memory reference. If I == -1, return whether the > + BUILTIN_FUNCTION contains any memory reference param. */ > + > +static bool > +ix86_builtin_has_mem_ref_p (int builtin_function, int i) > +{ > + switch ((enum ix86_builtins) builtin_function) > + { > + /* LOAD. */ > + case IX86_BUILTIN_LOADHPS: > + case IX86_BUILTIN_LOADLPS: > + case IX86_BUILTIN_LOADHPD: > + case IX86_BUILTIN_LOADLPD: > + if (i == -1 || i == 1) > + return true; > + break; > + case IX86_BUILTIN_LOADUPS: > + case IX86_BUILTIN_LOADUPD: > + case IX86_BUILTIN_LOADDQU: > + case IX86_BUILTIN_LOADUPD256: > + case IX86_BUILTIN_LOADUPS256: > + case IX86_BUILTIN_LOADDQU256: > + case IX86_BUILTIN_LDDQU256: > + if (i == -1 || i == 0) > + return true; > + break; > + /* STORE. */ > + case IX86_BUILTIN_STOREHPS: > + case IX86_BUILTIN_STORELPS: > + case IX86_BUILTIN_STOREUPS: > + case IX86_BUILTIN_STOREUPD: > + case IX86_BUILTIN_STOREDQU: > + case IX86_BUILTIN_STOREUPD256: > + case IX86_BUILTIN_STOREUPS256: > + case IX86_BUILTIN_STOREDQU256: > + if (i == -1 || i == 0) > + return true; > + default: > + break; > + } > + return false; > +} > + > /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL > to return a pointer to VERSION_DECL if the outcome of the expression > formed by PREDICATE_CHAIN is true. This function will be called during > @@ -32080,7 +32124,13 @@ > if (i == memory) > { > /* This must be the memory operand. */ > - op = force_reg (Pmode, convert_to_mode (Pmode, op, 1)); > + > + /* We expect the builtin could be expanded to rtl with memory > + operand and proper addressing mode will be kept as specified > + in TARGET_MEM_REF. */ > + if (!(TREE_CODE (arg) == ADDR_EXPR > + && TREE_CODE (TREE_OPERAND (arg, 0)) == TARGET_MEM_REF)) > + op = force_reg (Pmode, convert_to_mode (Pmode, op, 1)); > op = gen_rtx_MEM (mode, op); > gcc_assert (GET_MODE (op) == mode > || GET_MODE (op) == VOIDmode); > @@ -43035,6 +43085,9 @@ > #undef TARGET_ASM_FUNCTION_PROLOGUE > #define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue > > +#undef TARGET_BUILTIN_HAS_MEM_REF_P > +#define TARGET_BUILTIN_HAS_MEM_REF_P ix86_builtin_has_mem_ref_p > + > #undef TARGET_ASM_FUNCTION_EPILOGUE > #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue > > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/doc/tm.texi.in > 2013-12-21 21:08:29.623160737 -0800 > +++ gcc/doc/tm.texi.in 2013-12-09 10:45:50.124338984 -0800 > @@ -696,6 +696,8 @@ > > @hook TARGET_CHECK_STRING_OBJECT_FORMAT_ARG > > +@hook TARGET_BUILTIN_HAS_MEM_REF_P > + > @hook TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE > This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE} > but is called when the optimize level is changed via an attribute or > --- /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/doc/tm.texi > 2013-12-21 21:08:29.613160659 -0800 > +++ gcc/doc/tm.texi 2013-12-09 10:45:50.144339134 -0800 > @@ -708,6 +708,12 @@ > If a target implements string objects then this hook should should > provide a facility to check the function arguments in @var{args_list} > against the format specifiers in @var{format_arg} where the type of > @var{format_arg} is one recognized as a valid string reference type. > @end deftypefn > > +@deftypefn {Target Hook} bool TARGET_BUILTIN_HAS_MEM_REF_P (int > @var{builtin_function}, int @var{i}) > +This hook return whether the @var{i}th param of the @var{builtin_function} > +is a memory reference. If @var{i} is -1, return whether the > @var{builtin_function} > +contains any memory reference type param. > +@end deftypefn > + > @deftypefn {Target Hook} void TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE (void) > This target function is similar to the hook @code{TARGET_OPTION_OVERRIDE} > but is called when the optimize level is changed via an attribute or > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c > 2013-12-21 21:12:54.275187550 -0800 > +++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c 2014-01-22 > 11:12:44.108736315 -0800 > @@ -20,5 +20,5 @@ > return s; > } > > -/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */ > +/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */ > /* { dg-final { cleanup-tree-dump "ivopts" } } */ > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c > 2013-12-21 21:12:54.305187781 -0800 > +++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c 2014-01-21 > 23:01:16.220074850 -0800 > @@ -21,5 +21,5 @@ > return s; > } > > -/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */ > +/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */ > /* { dg-final { cleanup-tree-dump "ivopts" } } */ > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/ivopt_5.c > 2014-01-22 14:03:29.569415050 -0800 > +++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_5.c 2014-01-22 > 11:05:43.715445996 -0800 > @@ -0,0 +1,21 @@ > +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ > +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ > + > +/* Make sure only one iv is selected after IVOPT. */ > + > +#include <x86intrin.h> > +extern __m128i arr[], d[]; > +void test (void) > +{ > + unsigned int b; > + for (b = 0; b < 1000; b += 2) { > + __m128i *p = (__m128i *)(&d[b]); > + __m128i a = _mm_load_si128(&arr[4*b+3]); > + __m128i v = _mm_loadu_si128(p); > + v = _mm_xor_si128(v, a); > + _mm_storeu_si128(p, v); > + } > +} > + > +/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */ > +/* { dg-final { cleanup-tree-dump "ivopts" } } */ > --- > /usr/local/google/home/wmi/workarea/google_r205653_2/src/gcc/testsuite/gcc.dg/tree-ssa/loop-28.c > 2013-12-21 21:12:54.305187781 -0800 > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-28.c 2014-01-22 > 11:11:31.388167897 -0800 > @@ -14,7 +14,13 @@ > /* There should be 64 MEMs in the unrolled loop and one more in the > copy of the loop > for the rest of the iterations. */ > > -/* { dg-final { scan-tree-dump-times "MEM" 65 "optimized" } } */ > +/* We generate prefetch(addr) as: > + tmp = &MEM(addr); > + prefetch(tmp); > + to let ivopt understand the memory access mode better, so there is > another MEM in > + unrolled loop. */ > + > +/* { dg-final { scan-tree-dump-times "MEM" 66 "optimized" } } */ > > /* There should be no i_a = i_b assignments. */ > /* { dg-final { scan-tree-dump-times "i_.*= i_\[0-9\]*;" 0 "aprefetch" } } */