Hi!

A few more comments on the patch, as committed in r236678, also for
Chung-Lin and Tom.

The ChangeLos are missing references to GCC PRs, so these now should be
updated manually.  For example, your changes relate to PR70688 "bogus
OpenACC data clause errors involving reductions", and some of the
gcc/c/c-parser.c:c_finish_omp_clauses and
gcc/cp/parser.c:finish_omp_clauses changes (for OpenACC: "data clauses"
instead of "map clauses") and corresponding
gcc/testsuite/c-c++-common/goacc/data-clause-duplicate-1.c etc. test
suite updates relate to <http://gcc.gnu.org/PR65095> "Adapt OpenMP
diagnostic messages for OpenACC" (but that still is to remain open until
addressed in full).  Don't know if there are any other related PRs?

> --- gcc/c/c-parser.c
> +++ gcc/c/c-parser.c
> @@ -13602,6 +13602,7 @@ c_parser_oacc_declare (c_parser *parser)
>  
>        switch (OMP_CLAUSE_MAP_KIND (t))
>       {
> +     case GOMP_MAP_FIRSTPRIVATE_POINTER:
>       case GOMP_MAP_FORCE_ALLOC:
>       case GOMP_MAP_FORCE_TO:
>       case GOMP_MAP_FORCE_DEVICEPTR:
|       case GOMP_MAP_DEVICE_RESIDENT:
|         break;
| 
|       case GOMP_MAP_POINTER:
|         /* Generated by c_finish_omp_clauses from array sections;
|            avoid spurious diagnostics.  */
|         break;

Is "case GOMP_MAP_FIRSTPRIVATE_POINTER" meant to replace the "case
GOMP_MAP_POINTER"?  If yes, then please remove that one (does that become
gcc_unreachable?), and update/move the comment, or if not, please update
the comment, too.  ;-)

> --- gcc/c/c-typeck.c
> +++ gcc/c/c-typeck.c

>  /* Handle array sections for clause C.  */
>  
>  static bool
> -handle_omp_array_sections (tree c, bool is_omp)
> +handle_omp_array_sections (tree c, enum c_omp_region_type ort)
>  {
>    [...]
> @@ -12427,7 +12427,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>             && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
>       return false;
>        gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR);
> -      if (is_omp)
> +      if (ort == C_ORT_OMP || ort == C_ORT_ACC)
>       switch (OMP_CLAUSE_MAP_KIND (c))
>         {
>         case GOMP_MAP_ALLOC:
|         case GOMP_MAP_TO:
|         case GOMP_MAP_FROM:
|         case GOMP_MAP_TOFROM:
|         case GOMP_MAP_ALWAYS_TO:
|         case GOMP_MAP_ALWAYS_FROM:
|         case GOMP_MAP_ALWAYS_TOFROM:
|         case GOMP_MAP_RELEASE:
|         case GOMP_MAP_DELETE:
|           OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1;
|           break;
|         default:
>           break;
>         }

Why doesn't that apply also to the other (OpenACC) map kinds?  Comparing
to the full list in include/gomp-constants.h:enum gomp_map_kind, there
are several missing here.

>        tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
> -      if (!is_omp)
> +      if (ort != C_ORT_OMP && ort != C_ORT_ACC)
>       OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
>        else if (TREE_CODE (t) == COMPONENT_REF)
>       OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);

> --- gcc/cp/parser.c
> +++ gcc/cp/parser.c
> @@ -35214,6 +35214,7 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token 
> *pragma_tok)
>        gcc_assert (OMP_CLAUSE_CODE (t) == OMP_CLAUSE_MAP);
>        switch (OMP_CLAUSE_MAP_KIND (t))
>       {
> +     case GOMP_MAP_FIRSTPRIVATE_POINTER:
>       case GOMP_MAP_FORCE_ALLOC:
>       case GOMP_MAP_FORCE_TO:
>       case GOMP_MAP_FORCE_DEVICEPTR:

Likewise to my gcc/c/c-parser.c comments.

> --- gcc/cp/semantics.c
> +++ gcc/cp/semantics.c

>  /* Handle array sections for clause C.  */
>  
>  static bool
> -handle_omp_array_sections (tree c, bool is_omp)
> +handle_omp_array_sections (tree c, enum c_omp_region_type ort)
>  {
>    [...]
> @@ -4988,7 +4989,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>             || (TREE_CODE (t) == COMPONENT_REF
>                 && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE))
>           return false;
> -       if (is_omp)
> +       if (ort == C_ORT_OMP || ort == C_ORT_ACC)
>           switch (OMP_CLAUSE_MAP_KIND (c))
>             {
>             case GOMP_MAP_ALLOC:

Likewise to my gcc/c/c-typeck.c comments.

> @@ -5007,7 +5008,7 @@ handle_omp_array_sections (tree c, bool is_omp)
>             }
>         tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
>                                     OMP_CLAUSE_MAP);
> -       if (!is_omp)
> +       if ((ort & C_ORT_OMP_DECLARE_SIMD) != C_ORT_OMP && ort != C_ORT_ACC)
>           OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
>         else if (TREE_CODE (t) == COMPONENT_REF)
>           OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER);

Shouldn't that simply be "ort != C_ORT_OMP && ort != C_ORT_ACC"?

> @@ -6054,7 +6070,7 @@ finish_omp_clauses (tree clauses, enum 
> c_omp_region_type ort)
>           omp_note_field_privatization (t, OMP_CLAUSE_DECL (c));
>         else
>           t = OMP_CLAUSE_DECL (c);
> -       if (t == current_class_ptr)
> +       if (ort != C_ORT_ACC && t == current_class_ptr)
>           {
>             error ("%<this%> allowed in OpenMP only in %<declare simd%>"
>                    " clauses");

;-) Hmm, reminds me of the unresolved task to support the C++ "this"
pointer in OpenACC...  Anyway, in GCC trunk, we're not allowing "this"
usage, I think, so I suppose this should stay as-is?  (Possibly with an
OpenACC-specific error message.)

> @@ -6681,7 +6701,7 @@ finish_omp_clauses (tree clauses, enum 
> c_omp_region_type ort)
>                    omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
>             remove = true;
>           }
> -       else if (t == current_class_ptr)
> +       else if (ort != C_ORT_ACC && t == current_class_ptr)
>           {
>             error ("%<this%> allowed in OpenMP only in %<declare simd%>"
>                    " clauses");

Likewise.

> --- gcc/gimplify.c
> +++ gcc/gimplify.c
> @@ -6280,6 +6280,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree 
> decl, bool in_code)
>                       error ("variable %qE declared in enclosing "
>                              "%<host_data%> region", DECL_NAME (decl));
>                     nflags |= GOVD_MAP;
> +                   if (octx->region_type == ORT_ACC_DATA
> +                       && (n2->value & GOVD_MAP_0LEN_ARRAY))
> +                     nflags |= GOVD_MAP_0LEN_ARRAY;
>                     goto found_outer;
>                   }
>               }

Later on, everyone will have a hard time to understand that logic, so
please add comments for such special handling.  Why is ORT_ACC_DATA being
handled differently from the OpenMP target data construct, for example?

> @@ -6855,9 +6858,14 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
> *pre_p,
>           {
>           case OMP_TARGET:
>             break;
> +         case OACC_DATA:
> +           if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
> +             break;

Likewise.

Also add a "/* FALLTHRU */" comment here.

>           case OMP_TARGET_DATA:
>           case OMP_TARGET_ENTER_DATA:
>           case OMP_TARGET_EXIT_DATA:
> +         case OACC_ENTER_DATA:
> +         case OACC_EXIT_DATA:
>           case OACC_HOST_DATA:
>             if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
>                 || (OMP_CLAUSE_MAP_KIND (c)
|                     == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
|               /* For target {,enter ,exit }data only the array slice is
|                  mapped, but not the pointer to it.  */
|               remove = true;
|             break;
|           default:
|             break;
|           }

By the way, why is this not relevant for the OpenACC update and OpenMP
target update directives, OACC_UPDATE and OMP_TARGET_UPDATE?  Is it
because theses only update existing mappings but don't create new ones?

> @@ -7311,6 +7319,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
> *pre_p,
>                   omp_notice_variable (outer_ctx, t, true);
>               }
>           }
> +       if (code == OACC_DATA
> +           && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
> +           && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
> +         flags |= GOVD_MAP_0LEN_ARRAY;

Again, please add a comment to such special handling.

>         omp_add_variable (ctx, decl, flags);
>         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
>             && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
> @@ -7569,6 +7581,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
> *pre_p,
>         gcc_unreachable ();
>       }
>  
> +      if (code == OACC_DATA
> +       && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
> +       && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
> +     remove = true;
>        if (remove)
>       *list_p = OMP_CLAUSE_CHAIN (c);
>        else

Likewise.

> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-3.c
> @@ -17,5 +17,5 @@ foo (void)
>  /* Only the omp_data_i related loads should be annotated with
>     non-base 0 cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
>  
> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-4.c
> @@ -19,5 +19,5 @@ foo (void)
>  /* Only the omp_data_i related loads should be annotated with
>     non-base 0 cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */
>  
> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-5.c
> @@ -15,5 +15,5 @@ foo (int *a)
>  
>  /* Only the omp_data_i related loads should be annotated with cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 4 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */

You once explained to me that "the new firstprivate subarray pointer
changes sometimes results in fewer data clauses".  Tom CCed for your
information, also for the following ones:

> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-8.c
> @@ -7,7 +7,7 @@ extern void *acc_copyin (void *, size_t);
>  void
>  foo (int *a, size_t n)
>  {
> -  int *p = (int *)acc_copyin (&a, n);
> +  int *p = (int *)acc_copyin (a, n);

ACK (I think).

>  #pragma acc kernels deviceptr (p) pcopy(a[0:n])
>    {
> @@ -18,5 +18,5 @@ foo (int *a, size_t n)
>  
>  /* Only the omp_data_i related loads should be annotated with cliques.  */
>  /* { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 2 "ealias" } } */

Probably as above and/or related to the acc_copyin change?

> --- gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
> +++ gcc/testsuite/c-c++-common/goacc/kernels-alias-ipa-pta-3.c
> @@ -31,6 +31,5 @@ foo (void)
>    free (c);
>  }
>  
> -/* { dg-final { scan-tree-dump-times "(?n)= 0;$" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)= 1;$" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "(?n)= \\*a" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "(?n)= 0;$" 1 "optimized" { target c } 
> } } */
> +/* { dg-final { scan-tree-dump-times "(?n)= 1;$" 1 "optimized" { target c }  
> } } */

But that one looks strange to me.  Are we still testing what we're meant
to be testing?  Why is C++ different from C?  Needs a comment, please.

> --- /dev/null
> +++ gcc/testsuite/g++.dg/goacc/data-1.C

As you're duplicating most of the content (first using C++ reference
types, and then templated), please cross-reference that file with the
original gcc/testsuite/c-c++-common/goacc/data-2.c file, and vice verse.

> @@ -0,0 +1,39 @@
> +void
> +foo (int &a, int (&b)[100], int &n)
> +{
> +#pragma acc enter data copyin (a, b) async wait
> +#pragma acc enter data create (b[20:30]) async wait
> +#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause 
> before '\\\(' token" } */
> +#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' 
> before '\\\(' token" } */
> +#pragma acc exit data delete (a) if (0)
> +#pragma acc exit data copyout (b) if (a)
> +#pragma acc exit data delete (b)
> +#pragma acc enter /* { dg-error "expected 'data' in" } */
> +#pragma acc exit /* { dg-error "expected 'data' in" } */
> +#pragma acc enter data /* { dg-error "has no data movement clause" } */
> +#pragma acc exit data /* { dg-error "has no data movement clause" } */
> +#pragma acc enter Data /* { dg-error "invalid pragma before" } */
> +#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
> +}
> +
> +template<typename T>
> +void
> +foo (T &a, T (&b)[100], T &n)
> +{
> +#pragma acc enter data copyin (a, b) async wait
> +#pragma acc enter data create (b[20:30]) async wait
> +#pragma acc enter data (a) /* { dg-error "expected '#pragma acc' clause 
> before '\\\(' token" } */
> +#pragma acc enter data create (b(1:10)) /* { dg-error "expected '\\\)' 
> before '\\\(' token" } */
> +#pragma acc exit data delete (a) if (0)
> +#pragma acc exit data copyout (b) if (a)
> +#pragma acc exit data delete (b)
> +#pragma acc enter /* { dg-error "expected 'data' in" } */
> +#pragma acc exit /* { dg-error "expected 'data' in" } */
> +#pragma acc enter data /* { dg-error "has no data movement clause" } */
> +#pragma acc exit data /* { dg-error "has no data movement clause" } */
> +#pragma acc enter Data /* { dg-error "invalid pragma before" } */
> +#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
> +}
> +
> +/* { dg-error "has no data movement clause" "" { target *-*-* } 6 } */
> +/* { dg-error "has no data movement clause" "" { target *-*-* } 25 } */

I prefer if these dg-error directives are placed in the lines following
the ones they relate to (so in line 7 and line 27 in this case).

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
> @@ -1,6 +1,4 @@
>  /* { dg-do run { target openacc_nvidia_accel_selected } } */
> -/* 
> <http://news.gmane.org/find-root.php?message_id=%3C87pp0aaksc.fsf%40kepler.schwinge.homeip.net%3E>.
> -   { dg-xfail-run-if "TODO" { *-*-* } } */
>  /* { dg-additional-options "-lcuda" } */
>  
>  #include <openacc.h>

Chung-Lin CCed, because his "[PATCH, libgomp] Rewire OpenACC async",
<http://news.gmane.org/find-root.php?message_id=%3Cd37ca1c5-c8ed-5464-9660-7269f1460615%40codesourcery.com%3E>
is also meant to resolves this XFAIL.  Is that just a coincidence?

> copy from libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> copy to libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c
> @@ -1,12 +1,18 @@
> +/* This test is similar to data-2.c, but it uses acc_* library functions
> +   to move data.  */
> [...]

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
> @@ -1,3 +1,5 @@
> +/* Test 'acc enter/exit data' regions.  */
> +

Should also note here that its content is duplicated in data-2-lib.c.

> @@ -25,7 +27,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
> -#pragma acc parallel async wait
> +#pragma acc parallel present (a[0:N], b[0:N]) async wait
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];

I don't understand why we're adding all these "present" clauses instead
of relying on the standard/implicit "present_or_copy" behavior?
(... which is what users would be doing, I think?)  Same question applies
to data-2-lib.c, too.

> @@ -49,7 +51,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1)
> -#pragma acc parallel async (1)
> +#pragma acc parallel present (a[0:N], b[0:N])  async (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];
> @@ -76,17 +78,17 @@ main (int argc, char **argv)
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) 
> copyin (d[0:N]) copyin (N) async (1)
>  
> -#pragma acc parallel async (1) wait (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = (a[i] * a[i] * a[i]) / a[i];
>  
> -#pragma acc parallel async (2) wait (1)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
>  
> -#pragma acc parallel async (3) wait (1)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
> @@ -120,26 +122,27 @@ main (int argc, char **argv)
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) 
> copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1)
>  
> -#pragma acc parallel async (1) wait (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
>  
> -#pragma acc parallel async (2) wait (1)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
>  
> -#pragma acc parallel async (3) wait (1)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
>  
> -#pragma acc parallel wait (1) async (4)
> +#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
> +  wait (1) async (4)
>    for (int ii = 0; ii < N; ii++)
>      e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
>  
> -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) 
> copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
> +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
> +  copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
>  #pragma acc wait (1)

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c

> @@ -25,7 +27,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
> -#pragma acc parallel async wait
> +#pragma acc parallel present (a[0:N], b[0:N]) async wait
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];

Likewise ("present" clauses).

> @@ -49,7 +51,7 @@ main (int argc, char **argv)
>      }
>  
>  #pragma acc update device (a[0:N], b[0:N]) async (1)
> -#pragma acc parallel async (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = a[i];
> @@ -78,17 +80,17 @@ main (int argc, char **argv)
>  #pragma acc update device (b[0:N]) async (2)
>  #pragma acc enter data copyin (c[0:N], d[0:N]) async (3)
>  
> -#pragma acc parallel async (1) wait (1,2)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1,2)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      b[i] = (a[i] * a[i] * a[i]) / a[i];
>  
> -#pragma acc parallel async (2) wait (1,3)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1,3)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
>  
> -#pragma acc parallel async (3) wait (1,3)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1,3)
>  #pragma acc loop
>    for (i = 0; i < N; i++)
>      d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
> @@ -123,27 +125,28 @@ main (int argc, char **argv)
>  #pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1)
>  #pragma acc enter data copyin (e[0:N]) async (5)
>  
> -#pragma acc parallel async (1) wait (1)
> +#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
>  
> -#pragma acc parallel async (2) wait (1)
> +#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
>  
> -#pragma acc parallel async (3) wait (1)
> +#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
>    for (int ii = 0; ii < N; ii++)
>      d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
>  
> -#pragma acc parallel wait (1,5) async (4)
> +#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
> +  wait (1,5) async (4)
>    for (int ii = 0; ii < N; ii++)
>      e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
>  
> -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) 
> copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
> +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
> +  copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
>  #pragma acc exit data delete (N)
>  #pragma acc wait (1)

> --- /dev/null
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/enter_exit-lib.c
> @@ -0,0 +1,70 @@
> +/* Verify enter/exit data interoperablilty between pragmas and
> +   acc library calls.  */
> +
> +/* { dg-do run } */
> +
> +#include <stdlib.h>
> +#include <assert.h>
> +#include <openacc.h>
> +
> +int
> +main ()
> +{
> +  int *p = (int *)malloc (sizeof (int));
> +
> +  /* Test 1: pragma input, library output.  */
> +  
> +#pragma acc enter data copyin (p[0:1])
> +
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 1;
> +  }
> +
> +  acc_copyout (p, sizeof (int));
> +
> +  assert (p[0] == 1);
> +  
> +  /* Test 2: library input, pragma output.  */
> +
> +  acc_copyin (p, sizeof (int));
> +
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 2;
> +  }
> +
> +#pragma acc exit data copyout (p[0:1])
> +  
> +  assert (p[0] == 2);
> +
> +  /* Test 3: library input, library output.  */
> +
> +  acc_copyin (p, sizeof (int));
> +
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 3;
> +  }
> +
> +  acc_copyout (p, sizeof (int));
> +  
> +  assert (p[0] == 3);
> +
> +  /* Test 4: pragma input, pragma output.  */
> +
> +#pragma acc enter data copyin (p[0:1])
> +  
> +#pragma acc parallel present (p[0:1]) num_gangs (1)
> +  {
> +    p[0] = 3;

Meant to use "4" here?

> +  }
> +
> +#pragma acc exit data copyout (p[0:1])
> +  
> +  assert (p[0] == 3);
> +  
> +  free (p);
> +
> +  return 0;
> +}

> --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
> @@ -1,4 +1,6 @@
> -/* { dg-do run } */
> +/* Check acc_is_present and acc_delete.  */

Thanks for all these summary comments that you've added!


Grüße
 Thomas

Attachment: signature.asc
Description: PGP signature

Reply via email to