On Fri, 17 Aug 2018 18:39:00 +0200
Bernhard Reutner-Fischer <rep.dot....@gmail.com> wrote:

> On 16 August 2018 17:46:43 CEST, Julian Brown
> <jul...@codesourcery.com> wrote:
> >On Wed, 15 Aug 2018 21:56:54 +0200
> >Bernhard Reutner-Fischer <rep.dot....@gmail.com> wrote:
> >  
> >> On 15 August 2018 18:46:37 CEST, Julian Brown
> >> <jul...@codesourcery.com> wrote:  
> >> >On Mon, 13 Aug 2018 12:06:21 -0700
> >> >Cesar Philippidis <ce...@codesourcery.com> wrote:    
> >> 
> >> atttribute has more t than strictly necessary. 
> >> Don't like signed integer levels where they should be some
> >> unsigned. Also don't like single switch cases instead of if.
> >> And omitting function comments even if the hook way above is
> >> documented may be ok ish but is a bit lazy ;)  
> >
> >Here's a new version with those comments addressed. I also changed
> >the logic around a little to avoid adding decls to the vec in
> >omp_context which would never be given the gang-private attribute.
> >
> >Re-tested with offloading to NVPTX.
> >
> >OK?  
> 
> (TREE_CODE (var) == VAR_DECL
> Is nowadays known as VAR_P (decl), FWIW.

Fixed. (And also Tom's formatting nit mentioned in another email.)

> ISTM that global variables are not JIT-friendly.
> No further comments from me.

Probably true, but AFAIK nobody's trying to use the (GCC) JIT with the
PTX backend, and the backend already uses global variables for several
other purposes. Of course PTX code is JIT'ted itself by the NVidia
runtime, but I guess that's not what you were referring to!

Is this version OK? Re-tested with offloading to NVPTX.

Thanks,

Julian
commit 3335ddfa72944be5359280116e8eb4febd4ed3c7
Author: Julian Brown <jul...@codesourcery.com>
Date:   Thu Aug 9 20:27:04 2018 -0700

    [OpenACC] Add support for gang local storage allocation in shared memory
    
    2018-08-10  Julian Brown  <jul...@codesourcery.com>
    	    Chung-Lin Tang  <clt...@codesourcery.com>
    
    	gcc/
    	* config/nvptx/nvptx.c (tree-hash-traits.h): Include.
    	(gangprivate_shared_size): New global variable.
    	(gangprivate_shared_align): Likewise.
    	(gangprivate_shared_sym): Likewise.
    	(gangprivate_shared_hmap): Likewise.
    	(nvptx_option_override): Initialize gangprivate_shared_sym,
    	gangprivate_shared_align.
    	(nvptx_file_end): Output gangprivate_shared_sym.
    	(nvptx_goacc_expand_accel_var): New function.
    	(nvptx_set_current_function): New function.
    	(TARGET_SET_CURRENT_FUNCTION): Define hook.
    	(TARGET_GOACC_EXPAND_ACCEL): Likewise.
    	* doc/tm.texi (TARGET_GOACC_EXPAND_ACCEL_VAR): Document new hook.
    	* doc/tm.texi.in (TARGET_GOACC_EXPAND_ACCEL_VAR): Likewise.
    	* expr.c (expand_expr_real_1): Remap decls marked with the
    	"oacc gangprivate" attribute.
    	* omp-low.c (omp_context): Add oacc_partitioning_level and
    	oacc_addressable_var_decls fields.
    	(new_omp_context): Initialize oacc_addressable_var_decls in new
    	omp_context.
    	(delete_omp_context): Delete oacc_addressable_var_decls in old
    	omp_context.
    	(lower_oacc_head_tail): Record partitioning-level count in omp context.
    	(oacc_record_private_var_clauses, oacc_record_vars_in_bind)
    	(mark_oacc_gangprivate): New functions.
    	(lower_omp_for): Call oacc_record_private_var_clauses with "for"
    	clauses.  Call mark_oacc_gangprivate for gang-partitioned loops.
    	(lower_omp_target): Call oacc_record_private_var_clauses with "target"
    	clauses.
    	Call mark_oacc_gangprivate for offloaded target regions.
    	(lower_omp_1): Call vars_in_bind for GIMPLE_BIND within OMP regions.
    	* target.def (expand_accel_var): New hook.
    
    	libgomp/
    	* testsuite/libgomp.oacc-c-c++-common/gang-private-1.c: New test.
    	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: New test.
    	* testsuite/libgomp.oacc-c/pr85465.c: New test.
    	* testsuite/libgomp.oacc-fortran/gangprivate-attrib-1.f90: New test.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 9903a27..02c2847 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -73,6 +73,7 @@
 #include "cfgloop.h"
 #include "fold-const.h"
 #include "intl.h"
+#include "tree-hash-traits.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -137,6 +138,12 @@ static unsigned worker_red_size;
 static unsigned worker_red_align;
 static GTY(()) rtx worker_red_sym;
 
+/* Shared memory block for gang-private variables.  */
+static unsigned gangprivate_shared_size;
+static unsigned gangprivate_shared_align;
+static GTY(()) rtx gangprivate_shared_sym;
+static hash_map<tree_decl_hash, unsigned int> gangprivate_shared_hmap;
+
 /* Global lock variable, needed for 128bit worker & gang reductions.  */
 static GTY(()) tree global_lock_var;
 
@@ -210,6 +217,10 @@ nvptx_option_override (void)
   SET_SYMBOL_DATA_AREA (worker_red_sym, DATA_AREA_SHARED);
   worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
 
+  gangprivate_shared_sym = gen_rtx_SYMBOL_REF (Pmode, "__gangprivate_shared");
+  SET_SYMBOL_DATA_AREA (gangprivate_shared_sym, DATA_AREA_SHARED);
+  gangprivate_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
   diagnose_openacc_conflict (TARGET_GOMP, "-mgomp");
   diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack");
   diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt");
@@ -4971,6 +4982,10 @@ nvptx_file_end (void)
     write_worker_buffer (asm_out_file, worker_red_sym,
 			 worker_red_align, worker_red_size);
 
+  if (gangprivate_shared_size)
+    write_worker_buffer (asm_out_file, gangprivate_shared_sym,
+			 gangprivate_shared_align, gangprivate_shared_size);
+
   if (need_softstack_decl)
     {
       write_var_marker (asm_out_file, false, true, "__nvptx_stacks");
@@ -5918,6 +5933,52 @@ nvptx_can_change_mode_class (machine_mode, machine_mode, reg_class_t)
   return false;
 }
 
+/* Implement TARGET_GOACC_EXPAND_ACCEL_VAR.  Place "oacc gangprivate"
+   variables in shared memory.  */
+
+static rtx
+nvptx_goacc_expand_accel_var (tree var)
+{
+  if (VAR_P (var)
+      && lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (var)))
+    {
+      unsigned int offset, *poffset;
+      poffset = gangprivate_shared_hmap.get (var);
+      if (poffset)
+	offset = *poffset;
+      else
+	{
+	  unsigned HOST_WIDE_INT align = DECL_ALIGN (var);
+	  gangprivate_shared_size
+	    = (gangprivate_shared_size + align - 1) & ~(align - 1);
+	  if (gangprivate_shared_align < align)
+	    gangprivate_shared_align = align;
+
+	  offset = gangprivate_shared_size;
+	  bool existed = gangprivate_shared_hmap.put (var, offset);
+	  gcc_assert (!existed);
+	  gangprivate_shared_size += tree_to_uhwi (DECL_SIZE_UNIT (var));
+	}
+      rtx addr = plus_constant (Pmode, gangprivate_shared_sym, offset);
+      return gen_rtx_MEM (TYPE_MODE (TREE_TYPE (var)), addr);
+    }
+  return NULL_RTX;
+}
+
+static GTY(()) tree nvptx_previous_fndecl;
+
+/* Implement TARGET_SET_CURRENT_FUNCTION.  Reset per-function context.  */
+
+static void
+nvptx_set_current_function (tree fndecl)
+{
+  if (!fndecl || fndecl == nvptx_previous_fndecl)
+    return;
+
+  gangprivate_shared_hmap.empty ();
+  nvptx_previous_fndecl = fndecl;
+}
+
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
 
@@ -6054,6 +6115,12 @@ nvptx_can_change_mode_class (machine_mode, machine_mode, reg_class_t)
 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_GOACC_EXPAND_ACCEL_VAR
+#define TARGET_GOACC_EXPAND_ACCEL_VAR nvptx_goacc_expand_accel_var
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index e348f0a..9164917 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6124,6 +6124,14 @@ like @code{cond_add@var{m}}.  The default implementation returns a zero
 constant of type @var{type}.
 @end deftypefn
 
+@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_ACCEL_VAR (tree @var{var})
+This hook, if defined, is used by accelerator target back-ends to expand
+specially handled kinds of VAR_DECL expressions.  A particular use is to
+place variables with specific attributes inside special accelarator
+memories.  A return value of NULL indicates that the target does not
+handle this VAR_DECL, and normal RTL expanding is resumed.
+@end deftypefn
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f1ad80d..3cdaca2 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4202,6 +4202,8 @@ address;  but often a machine-dependent strategy can generate better code.
 
 @hook TARGET_PREFERRED_ELSE_VALUE
 
+@hook TARGET_GOACC_EXPAND_ACCEL_VAR
+
 @node Anchored Addresses
 @section Anchored Addresses
 @cindex anchored addresses
diff --git a/gcc/expr.c b/gcc/expr.c
index 85b7847..0f73deb 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9874,8 +9874,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
       exp = SSA_NAME_VAR (ssa_name);
       goto expand_decl_rtl;
 
-    case PARM_DECL:
     case VAR_DECL:
+      /* Allow accel compiler to handle specific cases of variables,
+	 specifically those tagged with the "oacc gangprivate" attribute,
+	 which may be intended to be placed in special memory in GPUs.  */
+      if (flag_openacc && targetm.goacc.expand_accel_var)
+	{
+	  temp = targetm.goacc.expand_accel_var (exp);
+	  if (temp)
+	    return temp;
+	}
+      /* ... fall through ...  */
+
+    case PARM_DECL:
       /* If a static var's type was incomplete when the decl was written,
 	 but the type is complete now, lay out the decl now.  */
       if (DECL_SIZE (exp) == 0
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b406ce7..f078110 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -133,6 +133,12 @@ struct omp_context
 
   /* True if this construct can be cancelled.  */
   bool cancellable;
+
+  /* The number of levels of OpenACC partitioning invoked in this context.  */
+  unsigned oacc_partitioning_levels;
+
+  /* Addressable variable decls in this context.  */
+  vec<tree> *oacc_addressable_var_decls;
 };
 
 static splay_tree all_contexts;
@@ -872,6 +878,7 @@ new_omp_context (gimple *stmt, omp_context *outer_ctx)
     }
 
   ctx->cb.decl_map = new hash_map<tree, tree>;
+  ctx->oacc_addressable_var_decls = new vec<tree> ();
 
   return ctx;
 }
@@ -953,6 +960,8 @@ delete_omp_context (splay_tree_value value)
       delete ctx->task_reduction_map;
     }
 
+  delete ctx->oacc_addressable_var_decls;
+
   XDELETE (ctx);
 }
 
@@ -6470,6 +6479,9 @@ lower_oacc_head_tail (location_t loc, tree clauses,
   tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
 
   gcc_assert (count);
+
+  ctx->oacc_partitioning_levels = count;
+
   for (unsigned done = 1; count; count--, done++)
     {
       gimple_seq fork_seq = NULL;
@@ -8144,6 +8156,68 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
     }
 }
 
+/* Record vars listed in private clauses in CLAUSES in CTX.  This information
+   is used to mark up variables that should be made private per-gang.  */
+
+static void
+oacc_record_private_var_clauses (omp_context *ctx, tree clauses)
+{
+  tree c;
+
+  if (!ctx)
+    return;
+
+  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE)
+      {
+	tree decl = OMP_CLAUSE_DECL (c);
+	if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+	  ctx->oacc_addressable_var_decls->safe_push (decl);
+      }
+}
+
+/* Record addressable vars declared in BINDVARS in CTX.  This information is
+   used to mark up variables that should be made private per-gang.  */
+
+static void
+oacc_record_vars_in_bind (omp_context *ctx, tree bindvars)
+{
+  if (!ctx)
+    return;
+
+  for (tree v = bindvars; v; v = DECL_CHAIN (v))
+    if (VAR_P (v) && TREE_ADDRESSABLE (v))
+      ctx->oacc_addressable_var_decls->safe_push (v);
+}
+
+/* Mark addressable variables which are declared implicitly or explicitly as
+   gang private with a special attribute.  These may need to have their
+   declarations altered later on in compilation (e.g. in
+   execute_oacc_device_lower or the backend, depending on how the OpenACC
+   execution model is implemented on a given target) to ensure that sharing
+   semantics are correct.  */
+
+static void
+mark_oacc_gangprivate (vec<tree> *decls)
+{
+  int i;
+  tree decl;
+
+  FOR_EACH_VEC_ELT (*decls, i, decl)
+    if (!lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (decl)))
+      {
+	if (dump_file && (dump_flags & TDF_DETAILS))
+	  {
+	    fprintf (dump_file,
+		     "Setting 'oacc gangprivate' attribute for decl:");
+	    print_generic_decl (dump_file, decl, TDF_SLIM);
+	    fputc ('\n', dump_file);
+	  }
+	DECL_ATTRIBUTES (decl)
+	  = tree_cons (get_identifier ("oacc gangprivate"),
+		       NULL, DECL_ATTRIBUTES (decl));
+      }
+}
 
 /* Lower code for an OMP loop directive.  */
 
@@ -8161,6 +8235,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   push_gimplify_context ();
 
+  oacc_record_private_var_clauses (ctx, gimple_omp_for_clauses (stmt));
+
   lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
 
   block = make_node (BLOCK);
@@ -8316,7 +8392,20 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   /* Add OpenACC partitioning and reduction markers just before the loop.  */
   if (oacc_head)
-    gimple_seq_add_seq (&body, oacc_head);
+    {
+      gimple_seq_add_seq (&body, oacc_head);
+
+      unsigned level_total = 0;
+      omp_context *thisctx;
+
+      for (thisctx = ctx; thisctx; thisctx = thisctx->outer)
+        level_total += thisctx->oacc_partitioning_levels;
+
+      /* If the current context and parent contexts are distributed over a
+	 total of one parallelism level, we have gang partitioning.  */
+      if (level_total == 1)
+        mark_oacc_gangprivate (ctx->oacc_addressable_var_decls);
+    }
 
   lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
 
@@ -9092,6 +9181,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   clauses = gimple_omp_target_clauses (stmt);
 
+  oacc_record_private_var_clauses (ctx, clauses);
+
   gimple_seq dep_ilist = NULL;
   gimple_seq dep_olist = NULL;
   if (omp_find_clause (clauses, OMP_CLAUSE_DEPEND))
@@ -9342,6 +9433,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   if (offloaded)
     {
+      mark_oacc_gangprivate (ctx->oacc_addressable_var_decls);
+
       /* Declare all the variables created by mapping and the variables
 	 declared in the scope of the target body.  */
       record_vars_into (ctx->block_vars, child_fn);
@@ -10336,6 +10429,7 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		 ctx);
       break;
     case GIMPLE_BIND:
+      oacc_record_vars_in_bind (ctx, gimple_bind_vars (as_a <gbind *> (stmt)));
       lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
       maybe_remove_omp_member_access_dummy_vars (as_a <gbind *> (stmt));
       break;
diff --git a/gcc/target.def b/gcc/target.def
index 96f37e0..e154b17 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1707,6 +1707,16 @@ for allocating any storage for reductions when necessary.",
 void, (gcall *call),
 default_goacc_reduction)
 
+DEFHOOK
+(expand_accel_var,
+"This hook, if defined, is used by accelerator target back-ends to expand\n\
+specially handled kinds of VAR_DECL expressions.  A particular use is to\n\
+place variables with specific attributes inside special accelarator\n\
+memories.  A return value of NULL indicates that the target does not\n\
+handle this VAR_DECL, and normal RTL expanding is resumed.",
+rtx, (tree var),
+NULL)
+
 HOOK_VECTOR_END (goacc)
 
 /* Functions relating to vectorization.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-private-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-private-1.c
new file mode 100644
index 0000000..f378346
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-private-1.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+int main (void)
+{
+  int ret;
+
+  #pragma acc parallel num_gangs(1) num_workers(32) copyout(ret)
+  {
+    int w = 0;
+
+    #pragma acc loop worker
+    for (int i = 0; i < 32; i++)
+      {
+        #pragma acc atomic update
+	w++;
+      }
+
+    ret = (w == 32);
+  }
+  assert (ret);
+
+  #pragma acc parallel num_gangs(1) vector_length(32) copyout(ret)
+  {
+    int v = 0;
+
+    #pragma acc loop vector
+    for (int i = 0; i < 32; i++)
+      {
+        #pragma acc atomic update
+	v++;
+      }
+
+    ret = (v == 32);
+  }
+  assert (ret);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
new file mode 100644
index 0000000..a4f81a3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <alloca.h>
+#include <string.h>
+#include <gomp-constants.h>
+#include <stdlib.h>
+
+#if 0
+#define DEBUG(DIM, IDX, VAL) \
+  fprintf (stderr, "%sdist[%d] = %d\n", (DIM), (IDX), (VAL))
+#else
+#define DEBUG(DIM, IDX, VAL)
+#endif
+
+#define N (32*32*32)
+
+int
+check (const char *dim, int *dist, int dimsize)
+{
+  int ix;
+  int exit = 0;
+
+  for (ix = 0; ix < dimsize; ix++)
+    {
+      DEBUG(dim, ix, dist[ix]);
+      if (dist[ix] < (N) / (dimsize + 0.5)
+	  || dist[ix] > (N) / (dimsize - 0.5))
+	{
+	  fprintf (stderr, "did not distribute to %ss (%d not between %d "
+		   "and %d)\n", dim, dist[ix], (int) ((N) / (dimsize + 0.5)),
+		   (int) ((N) / (dimsize - 0.5)));
+	  exit |= 1;
+	}
+    }
+
+  return exit;
+}
+
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int gangsize = 0, workersize = 0, vectorsize = 0;
+  int *gangdist, *workerdist, *vectordist;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+	    copy(ary) copyout(gangsize, workersize, vectorsize)
+  {
+#pragma acc loop gang worker vector
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int g, w, v;
+
+	g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+	ary[ix] = (g << 16) | (w << 8) | v;
+      }
+
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+  }
+
+  gangdist = (int *) alloca (gangsize * sizeof (int));
+  workerdist = (int *) alloca (workersize * sizeof (int));
+  vectordist = (int *) alloca (vectorsize * sizeof (int));
+  memset (gangdist, 0, gangsize * sizeof (int));
+  memset (workerdist, 0, workersize * sizeof (int));
+  memset (vectordist, 0, vectorsize * sizeof (int));
+
+  /* Test that work is shared approximately equally amongst each active
+     gang/worker/vector.  */
+  for (ix = 0; ix < N; ix++)
+    {
+      int g = (ary[ix] >> 16) & 255;
+      int w = (ary[ix] >> 8) & 255;
+      int v = ary[ix] & 255;
+
+      gangdist[g]++;
+      workerdist[w]++;
+      vectordist[v]++;
+    }
+
+  exit = check ("gang", gangdist, gangsize);
+  exit |= check ("worker", workerdist, workersize);
+  exit |= check ("vector", vectordist, vectorsize);
+
+  return exit;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/pr85465.c b/libgomp/testsuite/libgomp.oacc-c/pr85465.c
new file mode 100644
index 0000000..329e8a0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/pr85465.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-w" } */
+
+int
+main (void)
+{
+#pragma acc parallel
+  foo ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gangprivate-attrib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gangprivate-attrib-1.f90
new file mode 100644
index 0000000..5f8a5e6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gangprivate-attrib-1.f90
@@ -0,0 +1,25 @@
+! Test for "oacc gangprivate" attribute on gang-private variables
+
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-omplower-details" }
+! { dg-final { scan-tree-dump-times "Setting 'oacc gangprivate' attribute for decl:  integer\\(kind=4\\) w;" 1 "omplower" } } */
+
+program main
+  integer :: w, arr(0:31)
+
+  !$acc parallel num_gangs(32) num_workers(32) copyout(arr)
+    !$acc loop gang private(w)
+    do j = 0, 31
+      w = 0
+      !$acc loop seq
+      do i = 0, 31
+        !$acc atomic update
+        w = w + 1
+        !$acc end atomic
+      end do
+      arr(j) = w
+    end do
+  !$acc end parallel
+
+  if (any (arr .ne. 32)) stop 1
+end program main

Reply via email to