This patch contains the NVPTX backend support for placing OpenACC gang-private variables in GPU shared memory.
Tested with offloading to NVPTX. This is substantially the same as the version previously posted: I will assume it is already approved (unless I hear objections), and will commit it at the same time as the rest of the series. (https://gcc.gnu.org/pipermail/gcc-patches/2018-October/507909.html) Thanks, Julian 2021-02-23 Chung-Lin Tang <clt...@codesourcery.com> Julian Brown <jul...@codesourcery.com> gcc/ * config/nvptx/nvptx.c (tree-pretty-print.h): Include. (gangprivate_shared_size): New global variable. (gangprivate_shared_align): Likewise. (gangprivate_shared_sym): Likewise. (gangprivate_shared_hmap): Likewise. (nvptx_option_override): Initialize gangprivate_shared_sym, gangprivate_shared_align. (nvptx_file_end): Output gangprivate_shared_sym. (nvptx_goacc_adjust_private_decl, nvptx_goacc_expand_accel_var): New functions. (nvptx_set_current_function): Clear gangprivate_shared_hmap. (TARGET_GOACC_ADJUST_PRIVATE_DECL): Define hook. (TARGET_GOACC_EXPAND_VAR_DECL): Likewise. --- gcc/config/nvptx/nvptx.c | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 794c5a69db0..a0474b0077b 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -75,6 +75,7 @@ #include "fold-const.h" #include "intl.h" #include "opts.h" +#include "tree-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -167,6 +168,12 @@ static unsigned vector_red_align; static unsigned vector_red_partition; static GTY(()) rtx vector_red_sym; +/* Shared memory block for gang-private variables. */ +static unsigned gangprivate_shared_size; +static unsigned gangprivate_shared_align; +static GTY(()) rtx gangprivate_shared_sym; +static hash_map<tree_decl_hash, unsigned int> gangprivate_shared_hmap; + /* Global lock variable, needed for 128bit worker & gang reductions. */ static GTY(()) tree global_lock_var; @@ -251,6 +258,10 @@ nvptx_option_override (void) vector_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT; vector_red_partition = 0; + gangprivate_shared_sym = gen_rtx_SYMBOL_REF (Pmode, "__gangprivate_shared"); + SET_SYMBOL_DATA_AREA (gangprivate_shared_sym, DATA_AREA_SHARED); + gangprivate_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT; + diagnose_openacc_conflict (TARGET_GOMP, "-mgomp"); diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack"); diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt"); @@ -5355,6 +5366,10 @@ nvptx_file_end (void) write_shared_buffer (asm_out_file, vector_red_sym, vector_red_align, vector_red_size); + if (gangprivate_shared_size) + write_shared_buffer (asm_out_file, gangprivate_shared_sym, + gangprivate_shared_align, gangprivate_shared_size); + if (need_softstack_decl) { write_var_marker (asm_out_file, false, true, "__nvptx_stacks"); @@ -6582,6 +6597,62 @@ nvptx_truly_noop_truncation (poly_uint64, poly_uint64) return false; } +/* Implement TARGET_GOACC_ADJUST_PRIVATE_DECL. Set "oacc gangprivate" + attribute for gang-private variable declarations. */ + +static tree +nvptx_goacc_adjust_private_decl (tree decl, int level) +{ + if (level != GOMP_DIM_GANG) + return decl; + + if (!lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (decl))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Setting 'oacc gangprivate' attribute for decl:"); + print_generic_decl (dump_file, decl, TDF_SLIM); + fputc ('\n', dump_file); + } + tree id = get_identifier ("oacc gangprivate"); + DECL_ATTRIBUTES (decl) = tree_cons (id, NULL, DECL_ATTRIBUTES (decl)); + } + + return decl; +} + +/* Implement TARGET_GOACC_EXPAND_VAR_DECL. Place "oacc gangprivate" + variables in shared memory. */ + +static rtx +nvptx_goacc_expand_var_decl (tree var) +{ + if (VAR_P (var) + && lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (var))) + { + unsigned int offset, *poffset; + poffset = gangprivate_shared_hmap.get (var); + if (poffset) + offset = *poffset; + else + { + unsigned HOST_WIDE_INT align = DECL_ALIGN (var); + gangprivate_shared_size + = (gangprivate_shared_size + align - 1) & ~(align - 1); + if (gangprivate_shared_align < align) + gangprivate_shared_align = align; + + offset = gangprivate_shared_size; + bool existed = gangprivate_shared_hmap.put (var, offset); + gcc_assert (!existed); + gangprivate_shared_size += tree_to_uhwi (DECL_SIZE_UNIT (var)); + } + rtx addr = plus_constant (Pmode, gangprivate_shared_sym, offset); + return gen_rtx_MEM (TYPE_MODE (TREE_TYPE (var)), addr); + } + return NULL_RTX; +} + static GTY(()) tree nvptx_previous_fndecl; static void @@ -6590,6 +6661,7 @@ nvptx_set_current_function (tree fndecl) if (!fndecl || fndecl == nvptx_previous_fndecl) return; + gangprivate_shared_hmap.empty (); nvptx_previous_fndecl = fndecl; vector_red_partition = 0; oacc_bcast_partition = 0; @@ -6754,6 +6826,12 @@ nvptx_libc_has_function (enum function_class fn_class, tree type) #undef TARGET_HAVE_SPECULATION_SAFE_VALUE #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed +#undef TARGET_GOACC_ADJUST_PRIVATE_DECL +#define TARGET_GOACC_ADJUST_PRIVATE_DECL nvptx_goacc_adjust_private_decl + +#undef TARGET_GOACC_EXPAND_VAR_DECL +#define TARGET_GOACC_EXPAND_VAR_DECL nvptx_goacc_expand_var_decl + #undef TARGET_SET_CURRENT_FUNCTION #define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function -- 2.29.2