I've committed this patch to reduce the number of worker reduction allocation
builtins. We now pass in the (constant) allocation size and alignment and
return a void ptr.
nathan
2015-09-24 Nathan Sidwell <nat...@codesourcery.com>
* config/nvptx/nvptx.c (nvptx_expand_work_red_addr): Args 0 & 1
are size and alignment of allocation.
(nvptx_types): Delete NT_UINTPTR_UINT_UINT, NT_ULLPTR_UINT_UINT,
NT_FLTPTR_UINT_UINT, NT_DBLPTR_UINT_UINT. Add
NT_PTR_UINT_UINT_UINT_UINT.
(nvptx_builtins): Delete __builtin_nvptx_work_red_addrll,
__builtin_nvptx_work_red_addrf,
_builtin_nvptx_work_red_addrd. Adjust
__builtin_nvptx_work_red_addr type.
(nvptx_init_builtins): Adjust.
(nvptx_get_worker_addr_fn): Rename to ...
(nvptx_get_worker_red_addr): ... here. Use single builtin and
cast return type.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_fini,
nvptx_goac_reduction_teardown): Adjust.
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c (revision 228094)
+++ config/nvptx/nvptx.c (working copy)
@@ -4159,18 +4159,9 @@ nvptx_expand_work_red_addr (tree exp, rt
{
if (ignore)
return target;
-
- rtx loop_id = expand_expr (CALL_EXPR_ARG (exp, 0),
- NULL_RTX, mode, EXPAND_NORMAL);
- rtx red_id = expand_expr (CALL_EXPR_ARG (exp, 1),
- NULL_RTX, mode, EXPAND_NORMAL);
- gcc_assert (GET_CODE (loop_id) == CONST_INT
- && GET_CODE (red_id) == CONST_INT);
- gcc_assert (REG_P (target));
-
- unsigned lid = (unsigned)UINTVAL (loop_id);
- unsigned rid = (unsigned)UINTVAL (red_id);
+ unsigned lid = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 2));
+ unsigned rid = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 3));
unsigned ix;
for (ix = 0; ix != loop_reds.length (); ix++)
@@ -4186,15 +4177,14 @@ nvptx_expand_work_red_addr (tree exp, rt
/* Allocate a new var. */
{
- tree type = TREE_TYPE (TREE_TYPE (exp));
- enum machine_mode mode = TYPE_MODE (type);
- unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
+ unsigned size = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 0));
+ unsigned align = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1));
unsigned off = loop.hwm;
if (align > worker_red_align)
worker_red_align = align;
off = (off + align - 1) & ~(align -1);
- loop.hwm = off + GET_MODE_SIZE (mode);
+ loop.hwm = off + size;
loop.vars.safe_push (var_red_t (rid, off));
}
found_rid:
@@ -4221,10 +4211,7 @@ enum nvptx_types
NT_ULL_ULL_INT,
NT_FLT_FLT_INT,
NT_DBL_DBL_INT,
- NT_UINTPTR_UINT_UINT,
- NT_ULLPTR_UINT_UINT,
- NT_FLTPTR_UINT_UINT,
- NT_DBLPTR_UINT_UINT,
+ NT_PTR_UINT_UINT_UINT_UINT,
NT_MAX
};
@@ -4236,9 +4223,6 @@ enum nvptx_builtins
NVPTX_BUILTIN_SHUFFLE_DOWNF,
NVPTX_BUILTIN_SHUFFLE_DOWND,
NVPTX_BUILTIN_WORK_RED_ADDR,
- NVPTX_BUILTIN_WORK_RED_ADDRLL,
- NVPTX_BUILTIN_WORK_RED_ADDRF,
- NVPTX_BUILTIN_WORK_RED_ADDRD,
NVPTX_BUILTIN_MAX
};
@@ -4252,13 +4236,7 @@ static const struct builtin_description
nvptx_expand_shuffle_down},
{"__builtin_nvptx_shuffle_downd", NT_DBL_DBL_INT,
nvptx_expand_shuffle_down},
- {"__builtin_nvptx_work_red_addr", NT_UINTPTR_UINT_UINT,
- nvptx_expand_work_red_addr},
- {"__builtin_nvptx_work_red_addrll", NT_ULLPTR_UINT_UINT,
- nvptx_expand_work_red_addr},
- {"__builtin_nvptx_work_red_addrf", NT_FLTPTR_UINT_UINT,
- nvptx_expand_work_red_addr},
- {"__builtin_nvptx_work_red_addrd", NT_DBLPTR_UINT_UINT,
+ {"__builtin_nvptx_work_red_addr", NT_PTR_UINT_UINT_UINT_UINT,
nvptx_expand_work_red_addr},
};
@@ -4294,24 +4272,9 @@ nvptx_init_builtins (void)
types[NT_DBL_DBL_INT]
= build_function_type_list (double_type_node, double_type_node,
integer_type_node, NULL_TREE);
- types[NT_UINTPTR_UINT_UINT]
- = build_function_type_list (build_pointer_type (unsigned_type_node),
+ types[NT_PTR_UINT_UINT_UINT_UINT]
+ = build_function_type_list (ptr_type_node,
unsigned_type_node, unsigned_type_node,
- NULL_TREE);
-
- types[NT_ULLPTR_UINT_UINT]
- = build_function_type_list (build_pointer_type
- (long_long_unsigned_type_node),
- unsigned_type_node, unsigned_type_node,
- NULL_TREE);
-
- types[NT_FLTPTR_UINT_UINT]
- = build_function_type_list (build_pointer_type (float_type_node),
- unsigned_type_node, unsigned_type_node,
- NULL_TREE);
-
- types[NT_DBLPTR_UINT_UINT]
- = build_function_type_list (build_pointer_type (double_type_node),
unsigned_type_node, unsigned_type_node,
NULL_TREE);
@@ -4440,37 +4403,18 @@ nvptx_xform_lock (gcall *call, const int
}
static tree
-nvptx_get_worker_red_addr_fn (tree var, tree rid, tree lid)
+nvptx_get_worker_red_addr (tree type, tree rid, tree lid)
{
- tree vartype = TREE_TYPE (var);
- tree fndecl, call;
- enum nvptx_builtins fn;
- machine_mode mode = TYPE_MODE (vartype);
-
- switch (mode)
- {
- case QImode:
- case HImode:
- case SImode:
- fn = NVPTX_BUILTIN_WORK_RED_ADDR;
- break;
- case DImode:
- fn = NVPTX_BUILTIN_WORK_RED_ADDRLL;
- break;
- case DFmode:
- fn = NVPTX_BUILTIN_WORK_RED_ADDRD;
- break;
- case SFmode:
- fn = NVPTX_BUILTIN_WORK_RED_ADDRF;
- break;
- default:
- gcc_unreachable ();
- }
-
- fndecl = nvptx_builtin_decl (fn, true);
- call = build_call_expr (fndecl, 2, lid, rid);
+ machine_mode mode = TYPE_MODE (type);
+ tree fndecl = nvptx_builtin_decl (NVPTX_BUILTIN_WORK_RED_ADDR, true);
- return call;
+ PROMOTE_MODE (mode, NULL, type);
+ tree size = build_int_cst (unsigned_type_node, GET_MODE_SIZE (mode));
+ tree align = build_int_cst (unsigned_type_node,
+ GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
+ tree call = build_call_expr (fndecl, 4, size, align, lid, rid);
+
+ return fold_build1 (NOP_EXPR, build_pointer_type (type), call);
}
/* Emit a SHFL.DOWN using index SHFL of VAR into DEST_VAR. This function
@@ -4565,12 +4509,11 @@ nvptx_goacc_reduction_setup (gcall *call
if (level == GOMP_DIM_WORKER)
{
- tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (var)));
- tree call = nvptx_get_worker_red_addr_fn (var, rid, lid);
- tree ref;
+ tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), rid, lid);
+ tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
- ref = build_simple_mem_ref (ptr);
+ tree ref = build_simple_mem_ref (ptr);
TREE_THIS_VOLATILE (ref) = 1;
gimplify_assign (ref, var, &seq);
r = var;
@@ -4759,8 +4702,8 @@ nvptx_goacc_reduction_fini (gcall *call)
if (level == GOMP_DIM_WORKER)
{
- tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (var)));
- tree call = nvptx_get_worker_red_addr_fn (var, rid, lid);
+ tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), rid, lid);
+ tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
accum = build_simple_mem_ref (ptr);
@@ -4829,8 +4772,8 @@ nvptx_goacc_reduction_teardown (gcall *c
push_gimplify_context (true);
if (level == GOMP_DIM_WORKER)
{
- tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (var)));
- tree call = nvptx_get_worker_red_addr_fn (var, rid, lid);
+ tree call = nvptx_get_worker_red_addr(TREE_TYPE (var), rid, lid);
+ tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
r = build_simple_mem_ref (ptr);