[OG12][committed] OpenMP, libgomp: Handle unified shared memory in omp_target_is_accessible.

2022-12-13 Thread Marcel Vollweiler

This patch handles Unified Shared Memory (USM) in the OpenMP runtime routine
omp_target_is_accessible implementation.

A previous patch was submitted some months ago
(https://gcc.gnu.org/pipermail/gcc-patches/2022-May/594187.html) but not yet
reviewed due to dependencies on the Unified Shared Memory implementation.
Although USM is not yet in mainline, the corresponding patches were already
committed to OG12. I rebased, updated, and committed my patch to OG12
(devel/omp/gcc-12 branch).

I tested the patch with nvptx offloading (x86_64-linux and PowerPC) without
regressions. Since USM is not supported for all gcn targets, I tested gcn with
offloading for x86_64-linux on various targets (gfx90a, gfx908, gfx906, gfx803)
- also without regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 9044b7efb3518de180a5b3168615b7e12d93eea8
Author: Marcel Vollweiler 
Date:   Tue Dec 13 12:04:48 2022 +

OpenMP, libgomp: Handle unified shared memory in omp_target_is_accessible

This patch handles Unified Shared Memory (USM) in the OpenMP runtime routine
omp_target_is_accessible.

libgomp/ChangeLog:

* target.c (omp_target_is_accessible): Handle unified shared memory.
* testsuite/libgomp.c-c++-common/target-is-accessible-1.c: Updated.
* testsuite/libgomp.fortran/target-is-accessible-1.f90: Updated.
* testsuite/libgomp.c-c++-common/target-is-accessible-2.c: New test.
* testsuite/libgomp.fortran/target-is-accessible-2.f90: New test.

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 32bcc84..a0d0271 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2022-12-13  Marcel Vollweiler  
+
+   * target.c (omp_target_is_accessible): Handle unified shared memory.
+   * testsuite/libgomp.c-c++-common/target-is-accessible-1.c: Updated.
+   * testsuite/libgomp.fortran/target-is-accessible-1.f90: Updated.
+   * testsuite/libgomp.c-c++-common/target-is-accessible-2.c: New test.
+   * testsuite/libgomp.fortran/target-is-accessible-2.f90: New test.
+
 2022-12-12  Tobias Burnus  
 
Backported from master:
diff --git a/libgomp/target.c b/libgomp/target.c
index 50709f0..2cd8e2a 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -5067,9 +5067,13 @@ omp_target_is_accessible (const void *ptr, size_t size, 
int device_num)
   if (devicep == NULL)
 return false;
 
-  /* TODO: Unified shared memory must be handled when available.  */
+  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+return true;
 
-  return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
+  if (devicep->is_usm_ptr_func && devicep->is_usm_ptr_func ((void *) ptr))
+return true;
+
+  return false;
 }
 
 int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
index 2e75c63..e7f9cf2 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
@@ -1,3 +1,5 @@
+/* { dg-do run } */
+
 #include 
 
 int
@@ -6,7 +8,8 @@ main ()
   int d = omp_get_default_device ();
   int id = omp_get_initial_device ();
   int n = omp_get_num_devices ();
-  void *p;
+  int i = 42;
+  void *p = 
 
   if (d < 0 || d >= n)
 d = id;
@@ -26,23 +29,28 @@ main ()
   if (omp_target_is_accessible (p, sizeof (int), n + 1))
 __builtin_abort ();
 
-  /* Currently, a host pointer is accessible if the device supports shared
- memory or omp_target_is_accessible is executed on the host. This
- test case must be adapted when unified shared memory is avialable.  */
   int a[128];
   for (int d = 0; d <= omp_get_num_devices (); d++)
 {
+  /* SHARED_MEM is 1 if and only if host and device share the same memory.
+OMP_TARGET_IS_ACCESSIBLE should not return 0 for shared memory.  */
   int shared_mem = 0;
   #pragma omp target map (alloc: shared_mem) device (d)
shared_mem = 1;
-  if (omp_target_is_accessible (p, sizeof (int), d) != shared_mem)
+
+  if (shared_mem && !omp_target_is_accessible (p, sizeof (int), d))
+   __builtin_abort ();
+
+  /* USM is disabled by default.  Hence OMP_TARGET_IS_ACCESSIBLE should
+return 0 if shared_mem is false.  */
+  if (!shared_mem && omp_target_is_accessible (p, sizeof (int), d))
__builtin_abort ();
 
-  if (omp_target_is_accessible (a, 128 * sizeof (int), d) != shared_mem)
+  if (shared_mem && !omp_target_is_accessible (a, 128 * sizeof (int), d))
__builtin_abort ();
 
   for (int i = 0; i < 128; i++)
-   if (omp_target_is_accessi

Re: [Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices

2022-11-24 Thread Marcel Vollweiler

Hi Jakub,


> * testsuite/libgomp.c-c++-common/icv-4.c: Bugfix.

Better say what exactly you changed in words.


Changed.


> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -14153,7 +14153,7 @@ optimize_target_teams (tree target, gimple_seq
*pre_p)
>struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
>
>if (teams == NULL_TREE)
> -num_teams_upper = integer_one_node;
> +num_teams_upper = build_int_cst (integer_type_node, -2);
>else
>  for (c = OMP_TEAMS_CLAUSES (teams); c; c = OMP_CLAUSE_CHAIN (c))
>{

The function comment above optimize_target_teams contains detailed description
on what the values mean and why, so it definitely should document what -2 means
and when it is used.
I know you have documentation in libgomp for it, but it should be in both 
places.


I updated the comment with an explanation for "-2".



> +  intptr_t new_teams = orig_teams, new_threads = orig_threads;
> +  /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.

Two spaces after .


Corrected here and at other places.



> + ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but
the
> +  value could not be specified. No Change.

Likewise.
lowercase change ?


Corrected.



> + ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
> + Set device-specific value.
> + ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
> +No change.  */
> +  if (orig_teams == -2)
> +new_teams = 1;
> +  else if (orig_teams == 0)
> +{
> +  struct gomp_offload_icv_list *item = gomp_get_offload_icv_item 
(device);
> +  if (item != NULL)
> +   new_teams = item->icvs.nteams;
> +}
> +  /* The device-specific teams-thread-limit is only set if (a) an explicit 
TEAMS
> + region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already 
set by
> + e.g. a THREAD_LIMIT clause.  */
> +  if (orig_teams >= -2 && orig_threads == 0)

The comment talks about ORIG_TEAMS > -2, but the condition is >= -2.
So which one is it?


Thanks for the hint. It should be indeed "> -2" since teams_thread_limit "sets
the maximum number of OpenMP threads to use in each contention group created by
a teams construct" (OpenMP 5.2, section 21.6.2). So if there is no (explicit)
teams construct, then teams_thread_limit doesn't need to be copied to the 
device.



> +  /* This tests a large number of teams and threads. If it is larger than
> +2^15+1 then the according argument in the kernels arguments list
> +is encoded with two items instead of one. On NVIDIA there is an
> +adjustment for too large teams and threads. For AMD such adjustment
> +exists only for threads and will cause runtime errors with a two
> +large

s/two/too/ ?
Shouldn't amdgcn adjusts also number of teams?


I adjusted now also the number of teams in the amdgcn plugin. As upper bound I
chose two times the number of compute units. This seems to be sufficient when
one team is executed at one compute unit. This at least avoids the queueing of a
large amount of teams and the corresponding memory allocation.

The drawback is that a user is probably not aware of the actual number of
compute units (which is not very large on gfx cards, e.g. 120 for gfx908 and 104
for gfx90a) and thus maybe expects different values from omp_get_team_num(). For
instance in something like the following:

#pragma omp target
#pragma omp teams num_teams(220)
#pragma omp distribute parallel for
  for(int i = 0; i < 220; ++i)
{
#pragma omp critical
   ... omp_get_team_num () ...
}

On a gfx90a card with 104 compute units 12 threads are assigned to "reused"
teams (instead of having their own teams) that would not be the case without the
limit.

Alternatively, we could just define some (larger) constant number (though I
don't know a reasonable value here). But this does actually not solve the above
mentioned drawback. I think, we need to find a compromise between an
unneccessary small upper bound and the chance to get memory allocation failures
due to a too large number of teams.



As for testcases, have you tested this in a native setup where 
dg-set-target-env-var
actually works?


Besides remote testing with offloading (which does not yet work with
dg-set-target-env-var), I also tested locally on x86_64-pc-linux-gnu with one
nvptx offload device without issues (using "make check" and verifying that
offloading is indeed used).

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
This patch adds support for omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of
device-specific ICV values (specified as environment variables or changed on a
device). In order 

Re: [Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices

2022-09-18 Thread Marcel Vollweiler

Hi Jakub,

The last version of this patch was slightly adapted to the latest changes of the
device-specific environment variable syntax
(https://gcc.gnu.org/pipermail/gcc-patches/2022-September/601145.html), also
considering the latest related bug fixes (commits 994ea892bd02d and 
7d37c7f67c1bb).

The new patch version was bootstrapped and tested on x86_64-linux with nvptx and
amdgcn offloading without regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
This patch adds support for omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of
device-specific ICV values (specified as environment variables or changed on a
device). In order to reuse device-specific ICV values, a copy back mechanism is
implemented that copies ICV values back from device to the host. 

gcc/ChangeLog:

* gimplify.cc (optimize_target_teams): Set initial num_teams_upper
to "-2" instead of "1" for non-existing num_teams clause in order to
disambiguate from the case of an existing num_teams clause with value 1.

libgomp/ChangeLog:

* config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to
allow processing of device-specific values.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* icv-device.c (omp_get_teams_thread_limit): Likewise.
(ialias): Likewise.
(omp_set_teams_thread_limit): Likewise.
* icv.c (omp_set_teams_thread_limit): Removed.
(omp_get_teams_thread_limit): Likewise.
(ialias): Likewise.
* target.c (get_gomp_offload_icvs): Added teams_thread_limit_var
handling.
(gomp_load_image_to_device): Added a size check for the ICVs struct
variable.
(gomp_copy_back_icvs): New function that is used in GOMP_target_ext to
copy back the ICV values from device to host.
(GOMP_target_ext): Update the number of teams and threads in the kernel
args also considering device-specific values.
* testsuite/libgomp.c-c++-common/icv-4.c: Bugfix.
* testsuite/libgomp.c-c++-common/icv-5.c: Extended.
* testsuite/libgomp.c-c++-common/icv-6.c: Extended.
* testsuite/libgomp.c-c++-common/icv-7.c: Extended.
* testsuite/libgomp.c-c++-common/icv-9.c: New test.
* testsuite/libgomp.fortran/icv-5.f90: New test.
* testsuite/libgomp.fortran/icv-6.f90: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/target-teams-1.c: Adapt expected values for
num_teams from "1" to "-2" in cases without num_teams clause.
* g++.dg/gomp/target-teams-1.C: Likewise.
* gfortran.dg/gomp/defaultmap-4.f90: Likewise.
* gfortran.dg/gomp/defaultmap-5.f90: Likewise.
* gfortran.dg/gomp/defaultmap-6.f90: Likewise.

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index dcdc852..b393ed8 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -14153,7 +14153,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
   struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;
 
   if (teams == NULL_TREE)
-num_teams_upper = integer_one_node;
+num_teams_upper = build_int_cst (integer_type_node, -2);
   else
 for (c = OMP_TEAMS_CLAUSES (teams); c; c = OMP_CLAUSE_CHAIN (c))
   {
diff --git a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c 
b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
index 51b8d48..74d60e1 100644
--- a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
+++ b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c
@@ -81,5 +81,5 @@ foo (int a, int b, long c, long d)
 /* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */
-/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */
diff --git a/gcc/testsuite/g++.dg/gomp/target-teams-1.C 
b/gcc/testsuite/g++.dg/gomp/target-teams-1.C
index f78a608..29e5597 100644
--- a/gcc/testsuite/g++.dg/gomp/target-teams-1.C
+++ b/gcc/testsuite/g++.dg/gomp/target-teams-1.C
@@ -88,5 +88,5 @@ foo (int a, int b, long c, long d)
 /* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */
-/* { dg-final { 

Re: [PATCH] OpenMP, libgomp: Environment variable syntax extension.

2022-08-31 Thread Marcel Vollweiler

Hi Jakub,

Am 22.08.2022 um 17:35 schrieb Jakub Jelinek:

+/* Default values of ICVs according to the OpenMP standard.  */
+struct gomp_default_icv_t gomp_default_icv_values = {
+  .run_sched_var = GFS_DYNAMIC,
+  .run_sched_chunk_size = 1,
+  .max_active_levels_var = 1,
+  .bind_var = omp_proc_bind_false,
+  .nteams_var = 0,
+  .teams_thread_limit_var = 0,
+  .default_device_var = 0
+};


Why this var (and if it is really needed, why it isn't const)?
You seem to be using only 2 fields from it:
libgomp/libgomp.h:extern struct gomp_default_icv_t gomp_default_icv_values;
libgomp/env.c:struct gomp_default_icv_t gomp_default_icv_values = {
libgomp/target.c:new->icvs.nteams = gomp_default_icv_values.nteams_var;
libgomp/target.c:new->icvs.default_device = 
gomp_default_icv_values.default_device_var;


gomp_default_icv_values is used to store the default values of the ICVs as
defined in the OpenMP standard. Previously this was not necessary since there
were only host-related ICVs being initialized with the corresponding default
values in gomp_global_icv.

gomp_global_icv cannot be used to get the default values in general as they are
overwritten as soon as we parse an environment variable for a host-related ICV.
In contrast we need the default values not only at the very beginning when we
parse the environment variables, but also when we create device-specific ICV
structs. The point in time when a device-specific ICV struct is created can be
when a particular device is used first (as we don't know the device numbers
during parsing the environment variables).

I introduced gomp_default_icv_values in order to have the default ICV values
centralized (like in gomp_global_icv before) for a better readability and
maintanance.

As you stated correctly below, there were still multiple places using the
defaults directly. I modified that.




+
  bool gomp_cancel_var = false;
  enum gomp_target_offload_t gomp_target_offload_var
= GOMP_TARGET_OFFLOAD_DEFAULT;
@@ -104,86 +123,94 @@ int goacc_default_dims[GOMP_DIM_MAX];
  static int wait_policy;
  static unsigned long stacksize = GOMP_DEFAULT_STACKSIZE;

-/* Parse the OMP_SCHEDULE environment variable.  */
-
  static void
-parse_schedule (void)
+print_env_var_error (const char *env, const char *val)
  {
-  char *env, *end;
+  char name[val - env];
+  memcpy (name, env, val - env - 1);
+  name[val - env - 1] = '\0';
+  gomp_error ("Invalid value for environment variable %s: %s", name, val);


Why the temporary buffer (especially VLA)?
Just
   gomp_error ("Invalid value for environment variable %.*s: %s",
(int) (val - env - 1), env, val);
should do the job.


Good hint, thanks! (changed)




+/* Parse the OMP_SCHEDULE environment variable.  */
+static bool
+parse_schedule (const char *env, const char *val, void * const params[])


No space after *


Changed.




+#define ENTRY(NAME) NAME, sizeof (NAME) - 1
+static const struct envvar
+{
+  const char *name;
+  int name_len;
+  uint8_t flag_vars[3];
+  uint8_t flag;
+  bool (*parse_func) (const char *, const char *, void * const[]);
+} envvars[] = {
+  { ENTRY ("OMP_SCHEDULE"),
+{ GOMP_ICV_SCHEDULE, GOMP_ICV_SCHEDULE_CHUNK_SIZE },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_schedule },
+  { ENTRY ("OMP_NUM_TEAMS"),
+{ GOMP_ICV_NTEAMS },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_int },
+  { ENTRY ("OMP_DYNAMIC"),
+{ GOMP_ICV_DYNAMIC },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_boolean },
+  { ENTRY ("OMP_TEAMS_THREAD_LIMIT"),
+{ GOMP_ICV_TEAMS_THREAD_LIMIT },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_int },
+  { ENTRY ("OMP_THREAD_LIMIT"),
+{ GOMP_ICV_THREAD_LIMIT },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_unsigned_long },
+  { ENTRY ("OMP_NUM_THREADS"),
+{ GOMP_ICV_NTHREADS, GOMP_ICV_NTHREADS_LIST, GOMP_ICV_NTHREADS_LIST_LEN },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_unsigned_long_list },
+  { ENTRY ("OMP_PROC_BIND"),
+{ GOMP_ICV_BIND, GOMP_ICV_BIND_LIST, GOMP_ICV_BIND_LIST_LEN },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_bind_var },
+  { ENTRY ("OMP_MAX_ACTIVE_LEVELS"),
+{ GOMP_ICV_MAX_ACTIVE_LEVELS },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_unsigned_long },
+  { ENTRY ("OMP_WAIT_POLICY"),
+{ GOMP_ICV_WAIT_POLICY },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_wait_policy },
+  { ENTRY ("OMP_STACKSIZE"),
+{ GOMP_ICV_STACKSIZE },
+GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X,
+_stacksize },
+  { ENTRY ("OMP_CANCELLATION"), { GOMP_ICV_CANCELLATION }, 0, _boolean },
+  { ENTRY ("OMP_DISPLAY_AFFINITY"), { GOMP_ICV_DISPLAY_AFFINITY }, 0,
+_boolean },
+  { ENTRY ("OMP_TARGET_OFFLOAD"), { GOMP_ICV_TARGET_OFFLOAD }, 0,

Re: [Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices

2022-08-03 Thread Marcel Vollweiler

Hi Jakub,

This patch was reduced a bit and most of your comments were considered in the
last submission of the environment variable syntax extension patch
(https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html). This patch
also builds on that envvar patch version.

The nteams-var related content was moved from this patch to the envvar patch as
that is closely connected. However, additional testing and testing of copy back
device-specific nteams-var ICV values is still included in this patch together
with the teams-thread-limit-var content.


--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -13994,7 +13994,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p)
struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp;

if (teams == NULL_TREE)
-num_teams_upper = integer_one_node;
+num_teams_upper = integer_minus_two_node;


No, please don't introduce this, it is quite costly to have a GC trees
like integer_one_node, so they should stay for the most commonly used
numbers, -2 isn't like that.  Just build_int_cst (integer_type_node, -2).


integer_minus_two_node was replaced by "build_int_cst (integer_type_node, -2)".




--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -642,6 +642,7 @@ enum tree_index {
TI_INTEGER_ONE,
TI_INTEGER_THREE,
TI_INTEGER_MINUS_ONE,
+  TI_INTEGER_MINUS_TWO,
TI_NULL_POINTER,

TI_SIZE_ZERO,
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 8f83ea1..8cb474d 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9345,6 +9345,7 @@ build_common_tree_nodes (bool signed_char)
integer_one_node = build_int_cst (integer_type_node, 1);
integer_three_node = build_int_cst (integer_type_node, 3);
integer_minus_one_node = build_int_cst (integer_type_node, -1);
+  integer_minus_two_node = build_int_cst (integer_type_node, -2);

size_zero_node = size_int (0);
size_one_node = size_int (1);
diff --git a/gcc/tree.h b/gcc/tree.h
index cea49a5..1aeb009 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4206,6 +4206,7 @@ tree_strip_any_location_wrapper (tree exp)
  #define integer_one_node   global_trees[TI_INTEGER_ONE]
  #define integer_three_node  global_trees[TI_INTEGER_THREE]
  #define integer_minus_one_node global_trees[TI_INTEGER_MINUS_ONE]
+#define integer_minus_two_node  global_trees[TI_INTEGER_MINUS_TWO]
  #define size_zero_node global_trees[TI_SIZE_ZERO]
  #define size_one_node  global_trees[TI_SIZE_ONE]
  #define bitsize_zero_node  global_trees[TI_BITSIZE_ZERO]


And drop the above 3 hunks.


Removed.




--- a/libgomp/config/gcn/icv-device.c
+++ b/libgomp/config/gcn/icv-device.c
@@ -37,6 +37,7 @@ volatile int GOMP_DEFAULT_DEVICE_VAR;
  volatile int GOMP_MAX_ACTIVE_LEVELS_VAR;
  volatile omp_proc_bind_t GOMP_BIND_VAR;
  volatile int GOMP_NTEAMS_VAR;
+volatile int GOMP_TEAMS_THREAD_LIMIT_VAR;


I really don't like this copying of individual ICVs one by one to the
device, copy a struct containing them and access fields in that struct.


I recently changed this in
https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html. So there is
one struct containing all ICVs that are copied from host to the device and back.




--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -116,6 +116,7 @@ struct addr_pair
  #define GOMP_MAX_ACTIVE_LEVELS_VAR __gomp_max_active_levels
  #define GOMP_BIND_VAR __gomp_bind
  #define GOMP_NTEAMS_VAR __gomp_nteams
+#define GOMP_TEAMS_THREAD_LIMIT_VAR __gomp_teams_thread_limit_var


Likewise here.


Those were all removed.




@@ -527,13 +538,19 @@ struct gomp_icv_list {

  extern void *gomp_get_icv_value_ptr (struct gomp_icv_list **list,
  int device_num);
-extern struct gomp_icv_list *gomp_run_sched_var_dev_list;
-extern struct gomp_icv_list *gomp_run_sched_chunk_size_dev_list;
+extern struct gomp_icv_list* gomp_add_device_specific_icv (int dev_num,
+   size_t size,
+struct gomp_icv_list 
**list);
+extern struct gomp_icv_list *gomp_initial_run_sched_var_dev_list;
+extern struct gomp_icv_list *gomp_initial_run_sched_chunk_size_dev_list;
+extern struct gomp_icv_list *gomp_initial_max_active_levels_var_dev_list;
+extern struct gomp_icv_list *gomp_initial_proc_bind_var_dev_list;
+extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_dev_list;
+extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_len_dev_list;
+extern struct gomp_icv_list *gomp_initial_nteams_var_dev_list;
+
  extern struct gomp_icv_list *gomp_nteams_var_dev_list;
-extern struct gomp_icv_list *gomp_max_active_levels_var_dev_list;
-extern struct gomp_icv_list *gomp_proc_bind_var_dev_list;
-extern struct gomp_icv_list *gomp_proc_bind_var_list_dev_list;
-extern struct gomp_icv_list *gomp_proc_bind_var_list_len_dev_list;
+extern struct gomp_icv_list *gomp_teams_thread_limit_var_dev_list;


Nor these 

Re: [PATCH] OpenMP, libgomp: Environment variable syntax extension.

2022-07-25 Thread Marcel Vollweiler

Hi Jakub,


I'm not sure we can rely on execv on all targets that do support libgomp.
Any reason why you actually need this, rather than using
dg-set-target-env-var directive(s) and perhaps return 0; if getenv doesn't
return the expected values?


Interesting topic. After some (internal) discussions I think the best way is to
set the environment variables explicitely instead using dg-set-target-env-var.
The reason is that dg-set-target-env-var does not work for remote testing (which
seems to be a common test environment). For remote testing dejagnu immediately
aborts the test case with UNSUPPORTED which is specified in the corresponding
extension and makes sence from my point of view as the test assumption cannot be
fulfilled (since the environment variables are not set on remote targets).
It also means that whenever dg-set-target-env-var is set in the test file, the
execution of the test case is not tested on remote targets.


The only reason why dg-set-target-env-var is supported on native only right
now is that I'm never doing remote testing myself and so couldn't test that.
There is no inherent reason why the env vars couldn't be propagated over to
the remote and set in the environment there.
So trying to work around that rather than at least trying to change
dg-set-target-env-var so that it works with the remote testing you do looks
wrong.
If dg-set-target-env-var can be made to work remotely, it will magically
improve those 130+ tests that use it already together with the newly added
tests.

So, I'd suggest to just use dg-set-target-env-var and incrementally work on
making it work for remote testing if that is important to whomever does
that kind of testing.  Could be e.g. a matter of invoking remotely
env VAR1=val1 VAR2=val2 program args
instead of program args.  If env is missing on the remote side, it could
be UNSUPPORTED then.


I agree. So I changed the tests using dg-set-target-env-var and removed the
execv parts.




+/* The initial ICV values for the host, which are configured with environment
+   variables without a suffix, e.g. OMP_NUM_TEAMS.  */
+struct gomp_initial_icvs gomp_initial_icvs_none;
+
+/* Initial ICV values that were configured for the host and for all devices by
+   using environment variables like OMP_NUM_TEAMS_ALL.  */
+struct gomp_initial_icvs gomp_initial_icvs_all;
+
+/* Initial ICV values that were configured only for devices (not for the host)
+   by using environment variables like OMP_NUM_TEAMS_DEV.  */
+struct gomp_initial_icvs gomp_initial_icvs_dev;


As I said last time, I don't like allocating these
all the time in the data section of libgomp when at least for a few upcoming
years, most users will never use those suffixes.
Can't *_DEV and *_ALL go into the gomp_initial_icv_dev_list
chain too, perhaps


gomp_initial_icvs_{none, all, dev} are now defined as pointers (as you proposed
previously). gomp_initial_icvs_{all, dev} are only instantiated if at least one
according environment variable is parsed. gomp_initial_icvs_none is always
initialized with the initial global ICV values.

All three structures are now also included in gomp_initial_icv_list (previously
named gomp_initial_icv_dev_list) with "magic device numbers" -1, -2, and -3.
The list items for _DEV, _ALL and no suffix are stored at the beginning of the
list whereas the device-specific list items are attached at the end.




+static const struct envvar
+{
+  const char *name;
+  int name_len;
+  unsigned char flag_vars[3];
+  unsigned char flag;
+  void *params[3];
+  bool (*parse_func) (const char *, const char *, void * const[]);
+} envvars[] = {
+  {ENTRY ("OMP_SCHEDULE_DEV"), {OMP_SCHEDULE_DEV_, OMP_SCHEDULE_CHUNK_SIZE_DEV_}, 
GOMP_ENV_VAR_SUFFIX_DEV, {_initial_icvs_dev.run_sched_var, 
_initial_icvs_dev.run_sched_chunk_size}, _schedule},
+  {ENTRY ("OMP_SCHEDULE_ALL"), {OMP_SCHEDULE_DEV_, OMP_SCHEDULE_CHUNK_SIZE_DEV_}, 
GOMP_ENV_VAR_SUFFIX_ALL, {_initial_icvs_all.run_sched_var, 
_initial_icvs_all.run_sched_chunk_size}, _schedule},
+  {ENTRY ("OMP_SCHEDULE"), {OMP_SCHEDULE_DEV_, OMP_SCHEDULE_CHUNK_SIZE_DEV_}, 
GOMP_ENV_VAR_SUFFIX_NONE, {_initial_icvs_none.run_sched_var, 
_initial_icvs_none.run_sched_chunk_size}, _schedule},
+
+  {ENTRY ("OMP_NUM_TEAMS_DEV"), {OMP_NUM_TEAMS_DEV_}, GOMP_ENV_VAR_SUFFIX_DEV , 
{_initial_icvs_dev.nteams_var, false}, _int},
+  {ENTRY ("OMP_NUM_TEAMS_ALL"), {OMP_NUM_TEAMS_DEV_}, GOMP_ENV_VAR_SUFFIX_ALL, 
{_initial_icvs_all.nteams_var, false}, _int},
+  {ENTRY ("OMP_NUM_TEAMS"), {OMP_NUM_TEAMS_DEV_}, GOMP_ENV_VAR_SUFFIX_NONE, 
{_initial_icvs_none.nteams_var, false}, _int},
+
+  {ENTRY ("OMP_DYNAMIC_DEV"), {OMP_DYNAMIC_DEV_}, GOMP_ENV_VAR_SUFFIX_DEV, 
{_initial_icvs_dev.dyn_var}, _boolean},
+  {ENTRY ("OMP_DYNAMIC_ALL"), {OMP_DYNAMIC_DEV_}, GOMP_ENV_VAR_SUFFIX_ALL, 
{_initial_icvs_all.dyn_var}, _boolean},
+  {ENTRY ("OMP_DYNAMIC"), {OMP_DYNAMIC_DEV_}, GOMP_ENV_VAR_SUFFIX_NONE, 
{_initial_icvs_none.dyn_var}, _boolean},
+
+  {ENTRY ("OMP_TEAMS_THREAD_LIMIT_DEV"), 

[PING][Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices

2022-06-09 Thread Marcel Vollweiler

Hi,

I’d like to ping the patch for the OpenMP runtime routines omp_get_max_teams,
omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices:

https://gcc.gnu.org/pipermail/gcc-patches/2022-April/593260.html

This patch builds on the following patch which is currently in revision/review:
- [PATCH] OpenMP, libgomp: Environment variable syntax extension.
https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588728.html

As several technical details will be changed anyway due to revision of the
environment variable extension patch, a complete review does not make sense yet
from my point of view. However, I wondered if a "rough" review about the main
approach/idea is feasible, so that necessary changes could be included in the
revision that is needed anyway.

Thanks
Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] OpenMP, libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_async

2022-05-19 Thread Marcel Vollweiler

Hi Jakub,

Am 17.05.2022 um 20:08 schrieb Jakub Jelinek:

On Tue, May 17, 2022 at 11:57:02AM +0200, Marcel Vollweiler wrote:

More importantly, I have no idea how this can work when you pass arg_size 0
and arg_align 0.  The s variable is in the current function frame, with
arg_size 0 nothing is really copied to the generated task.
arg_size should be sizeof (memcpy_t) and arg_align __alignof__ (memcpy_t)
(well, struct omp_target_memcpy_data).


The copy function of GOMP_task ("cpyfn") is not used here (set to NULL) and thus
also arg_size and arg_align are set to 0 since they are related to cpyfn if I
understand it correctly.


No, arg_size and arg_align are for all (explicit) tasks the size and
alignment of the arguments.  For an included task (one executed by the
encountering thread) we indeed use data directly instead of allocating
arg_size arg_align aligned bytes and copying data to it.  But when we create
a deferred task (that is the only thing that actually can be asynchronous), we
allocate struct gomp_task together with memory for the data (arg_size bytes
aligned to arg_align).  If cpyfn, we invoke that copy function (from source
data to the destination buffer), otherwise memcpy.  cpyfn is a callback that
will do memcpy for parts that need bitwise copy and copy construction /
whatever else is needed for other data.
Looking at your patch, you call GOMP_task always with if_clause = false,
that means it is always included task (like with #pragma omp task if(0)),
but that also means calling GOMP_task doesn't bring any advantages and it is
not asynchronous.
If you called it with if_clause = true, like what #pragma omp task would do,
then the arg_size = 0 and arg_align = 0 would make it not work at all,
so after fixing if_clause, you need to supply sizeof (s) and __alignof__ (s).


Good explanation, thanks. Changed accordingly.




Also, it would be nice to avoid GOMP_task for the depobj_count == 0 case
at least sometimes (but perhaps that can be done incrementally) and instead
use some CUDA etc. asynchronous copy APIs.  We don't really need to wait
for anything in that case, and from OpenMP POV all we need to make sure is
that barrier/taskwait/taskgroup end will know about these "tasks" and
wait for them.  So, it can be implemented more like #pragma omp target nowait
instead of #pragma omp task that calls the synchronous omp_target_memcpy.
Though, maybe that is how it should be implemented always, something like
gomp_create_target_task and its caller.  We already use that single routine
for multiple purposes (target nowait as well as target enter/exit data
nowait), so just telling it somehow that it shouldn't do mapping/unmapping
and perhaps target execution and instead copying would be nice.


I dont't see/understand the advantage using gomp_create_target_task over
GOMP_task. Whether the task waits for dependencies
("gomp_task_maybe_wait_for_dependencies") depends on GOMP_TASK_FLAG_DEPEND which
is only set if depobj_count > 0 and depobj_list != NULL. Thus, there shouldn't
be any waiting in case of depobj_count == 0? Additionally, in both functions a
new thread is created - independently of dependencies.


GOMP_task never creates a new thread.
gomp_create_target_task can create (but just once) an unshackeled thread
that runs on the side, doesn't do normal OpenMP user work and just polls the
offloading device and performs unmapping or whatever is needed to finish a
nowait offloaded task.

The disadvantage of GOMP_task is:
1) if you call say omp_target_memcpy_async from outside of parallel, it will
not be actually asynchronous even if you call GOMP_task with if_clause = 
true
2) if you call it from inside of parallel, it might be scheduled only when
some host thread is ready for work (e.g. when reaching #pragma omp barrier,
implicit barrier, #pragma omp taskwait etc.), so even when the offloading
device is unused but host has lots of work to do, it might take quite a
while before starting the work, and then one of the OpenMP host threads
will be blocked waiting for the copying to be done

gomp_create_target_task doesn't have these disadvantages, it can fire off the
copying right away and then just needs to be able to figure out when it
finished (either the unshackeled thread polls the device, or some other way
how to find out that it finished; but OpenMP certainly needs to know that,
because user code can say #pragma omp taskwait for it, or it should be
complete at the end of a taskgroup, or at the end of #pragma omp barrier
or implicit barrier etc.).

Anyway, I guess it is ok to use GOMP_task in the initial patch and change it
later, but if_clause = false and 0, 0 for arg_{size,align} are definitely
wrong.


Agreed. Thanks for the details.




+int
+omp_target_memcpy (void *dst, const void *src, size_t length, size_t 
dst_offset,
+   size_t src_offset, int dst_device_num, int src_device_num)
+{
+  struct gomp_device_descr *dst_devicep = 

Re: [PATCH] OpenMP, libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_async

2022-05-17 Thread Marcel Vollweiler

Hi Jakub,


--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -224,6 +224,8 @@ OMP_5.1 {
 omp_set_teams_thread_limit_8_;
 omp_get_teams_thread_limit;
 omp_get_teams_thread_limit_;
+omp_target_memcpy_async;
+omp_target_memcpy_rect_async;
  } OMP_5.0.2;


These should be added to OMP_5.1.1, not here.


Changed.


--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -272,6 +272,10 @@ extern int omp_target_is_present (const void *, int) 
__GOMP_NOTHROW;
  extern int omp_target_memcpy (void *, const void *, __SIZE_TYPE__,
   __SIZE_TYPE__, __SIZE_TYPE__, int, int)
__GOMP_NOTHROW;
+extern int omp_target_memcpy_async (void *, const void *, __SIZE_TYPE__,
+__SIZE_TYPE__, __SIZE_TYPE__, int, int,
+int, omp_depend_t*)


Formatting, space before *.


Changed.


+  __GOMP_NOTHROW;
  extern int omp_target_memcpy_rect (void *, const void *, __SIZE_TYPE__, int,
const __SIZE_TYPE__ *,
const __SIZE_TYPE__ *,
@@ -279,6 +283,14 @@ extern int omp_target_memcpy_rect (void *, const void *, 
__SIZE_TYPE__, int,
const __SIZE_TYPE__ *,
const __SIZE_TYPE__ *, int, int)
__GOMP_NOTHROW;
+extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__,
+ int, const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *,
+ const __SIZE_TYPE__ *, int, int, int,
+ omp_depend_t*)


Likewise.


Changed.


-int
-omp_target_memcpy (void *dst, const void *src, size_t length,
-   size_t dst_offset, size_t src_offset, int dst_device_num,
-   int src_device_num)
+static int
+omp_target_memcpy_check (void *dst, const void *src, int dst_device_num,
+ int src_device_num,
+ struct gomp_device_descr **dst_devicep,
+ struct gomp_device_descr **src_devicep)
  {


Why does omp_target_memcpy_check need the dst and src arguments?  From what
I can see, they aren't used by it.


Good point, dst and src arguments are removed.


+typedef struct
+{
+  void *dst;
+  const void *src;
+  size_t length;
+  size_t dst_offset;
+  size_t src_offset;
+  struct gomp_device_descr *dst_devicep;
+  struct gomp_device_descr *src_devicep;
+} memcpy_t;


Please come up with some less generic name, struct omp_target_memcpy_data
or something similar.  Even the *_t suffix is problematic, as *_t is
reserved for the implementation.


Renamed "memcpy_t" into "omp_target_memcpy_data" and "memcpy_rect_t" into
"omp_target_memcpy_rect_data".


+
+void
+omp_target_memcpy_async_helper (void *args)


This should be static.


Changed for "omp_target_memcpy_async_helper" and
"omp_target_memcpy_rect_async_helper".


+{
+  memcpy_t *a = args;
+  int ret = omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
+a->src_offset, a->dst_devicep,
+a->src_devicep);
+  if (ret)
+gomp_fatal ("asynchronous memcpy failed");


I'm not really sure killing the whole program if the copying failed is the
best action.  Has it been discussed on omp-lang?  Perhaps the APIs should
have a way how to propagate the result to the caller when it completes
somehow?


I agree that gomp_fatal is quite harsh here. Otherwise I am afraid that
undefined behaviour can result from silently ignoring copy failures. I agree
with Tobias to keep gomp_fatal for now (as I don't see any useful alternative
yet) and discuss a (general) approach for OpenMP (as Tobias triggered in
https://github.com/OpenMP/spec/issues/3286).

As Tobias suggested, I replaced the error messages with "omp_target_memcpy
failed" and "omp_target_memcpy_rect failed".


Even if we do that, the ret variable seems to be superfluos, just do
   if (omp_target_memcpy_copy (...))
 gomp_fatal (...);


Changed.




+{
+  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+
+  int check = omp_target_memcpy_check (dst, src, dst_device_num, 
src_device_num,
+   _devicep, _devicep);
+  if (check)
+return check;
+
+  void (*fn) (void *) = _target_memcpy_async_helper;
+  void *data = NULL;
+  void (*cpyfn) (void *, void *) = NULL;
+  long arg_size = 0;
+  long arg_align = 0;
+  bool if_clause = false;
+  unsigned flags = 0;
+  int priority_arg = 0;
+  void *detach = NULL;
+
+  memcpy_t s = {
+.dst = dst,
+.src = src,
+.length = length,
+.dst_offset = dst_offset,
+.src_offset = src_offset,
+.dst_devicep = dst_devicep,
+.src_devicep = src_devicep
+  };


I think we in libgomp try to use C89 and so declare vars first before other

Re: [PATCH] OpenMP, C++: Add template support for the has_device_addr clause.

2022-05-10 Thread Marcel Vollweiler

Hi Jakub,


diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 0cb17a6..452ecfd 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -8534,11 +8534,14 @@ finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
 {
   if (handle_omp_array_sections (c, ort))
 remove = true;
+  else if (TREE_CODE (TREE_CHAIN (t)) == PARM_DECL)
+t = TREE_CHAIN (t);
   else
 {
   t = OMP_CLAUSE_DECL (c);
   while (TREE_CODE (t) == INDIRECT_REF
- || TREE_CODE (t) == ARRAY_REF)
+ || TREE_CODE (t) == ARRAY_REF
+ || TREE_CODE (t) == NON_LVALUE_EXPR)
 t = TREE_OPERAND (t, 0);
 }
 }


This is wrong.
When processing_template_decl, handle_omp_array_sections often punts, keeps
things as is because if something is dependent, we can't do much about it.
The else if (TREE_CODE (TREE_CHAIN (t)) == PARM_DECL) is obviously wrong,
there is really nothing specific about PARM_DECLs (just that you used
exactly that in the testcase), nor about array section with exactly one
dimension.  What is done elsewhere is look through all TREE_LISTs to find
the base expression, and if that expression is a VAR_DECL/PARM_DECL, nice,
we can do further processing, if processing_template_decl and it is
something different, just defer and otherwise error out.

So I think you want:
--- gcc/cp/semantics.cc.jj2022-05-05 11:56:16.160443828 +0200
+++ gcc/cp/semantics.cc   2022-05-05 15:52:39.651211448 +0200
@@ -8553,14 +8553,23 @@ finish_omp_clauses (tree clauses, enum c
else
  {
t = OMP_CLAUSE_DECL (c);
+   if (TREE_CODE (t) == TREE_LIST)
+ {
+   while (TREE_CODE (t) == TREE_LIST)
+ t = TREE_CHAIN (t);
+ }
while (TREE_CODE (t) == INDIRECT_REF
   || TREE_CODE (t) == ARRAY_REF)
  t = TREE_OPERAND (t, 0);
  }
  }
-   bitmap_set_bit (_on_device_head, DECL_UID (t));
if (VAR_P (t) || TREE_CODE (t) == PARM_DECL)
- cxx_mark_addressable (t);
+ {
+   bitmap_set_bit (_on_device_head, DECL_UID (t));
+   if (!processing_template_decl
+   && !cxx_mark_addressable (t))
+ remove = true;
+ }
goto check_dup_generic_t;

  case OMP_CLAUSE_USE_DEVICE_ADDR:
instead, as I said look through the TREE_LISTs, then only use DECL_UID
on actual VAR_DECLs/PARM_DECLs not random other expressions and
never call cxx_mark_addressable when processing_template_decl (and remove
clause if cxx_mark_addressable fails).
Note, check_dup_generic_t will do among other things:
   if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL
   && (!field_ok || TREE_CODE (t) != FIELD_DECL))
 {
   if (processing_template_decl && TREE_CODE (t) != OVERLOAD)
 break;
... error ...
  }
so with processing_template_decl it will just defer it for later,
but otherwise if t is something invalid it will diagnose it.
But one really shouldn't rely on t being VAR_DECL/PARM_DECL before
that checking is done...

With your pt.cc change and my semantics.cc change, all your new testcases
look fine.


Thank you very much for your detailed explanation. That helped me a lot for my
understanding!
I adjusted the code accordingly.


diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index f570daa..b1bb5be 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -10285,7 +10285,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
 case OMP_CLAUSE_HAS_DEVICE_ADDR:
   decl = OMP_CLAUSE_DECL (c);
   while (TREE_CODE (decl) == INDIRECT_REF
- || TREE_CODE (decl) == ARRAY_REF)
+ || TREE_CODE (decl) == ARRAY_REF
+ || TREE_CODE (decl) == NON_LVALUE_EXPR)
 decl = TREE_OPERAND (decl, 0);
   flags = GOVD_EXPLICIT;
   goto do_add_decl;
@@ -11443,7 +11444,8 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, 
gimple_seq body, tree *list_p,
 case OMP_CLAUSE_HAS_DEVICE_ADDR:
   decl = OMP_CLAUSE_DECL (c);
   while (TREE_CODE (decl) == INDIRECT_REF
- || TREE_CODE (decl) == ARRAY_REF)
+ || TREE_CODE (decl) == ARRAY_REF
+ || TREE_CODE (decl) == NON_LVALUE_EXPR)
 decl = TREE_OPERAND (decl, 0);
   n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
   remove = n == NULL || !(n->value & GOVD_SEEN);
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176ef..30cc9b6 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -1384,7 +1384,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 }
   else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR)
 {
-  if (TREE_CODE (decl) == INDIRECT_REF)
+  if 

[PATCH] OpenMP, libgomp: Handle unified shared memory in omp_target_is_accessible.

2022-05-06 Thread Marcel Vollweiler

Hi,

This is a follow up patch of the patch that adds the OpenMP runtime routine
omp_target_is_accessible:

   https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591601.html

It considers now also unified shared memory (usm) that was submitted recently
(but not yet approved/committed):
   https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591349.html

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Handle unified shared memory in omp_target_is_accessible.

libgomp/ChangeLog:

* target.c (omp_target_is_accessible): Handle unified shared memory.
* testsuite/libgomp.c-c++-common/target-is-accessible-1.c: Updated.
* testsuite/libgomp.fortran/target-is-accessible-1.f90: Updated.
* testsuite/libgomp.c-c++-common/target-is-accessible-2.c: New test.
* testsuite/libgomp.fortran/target-is-accessible-2.f90: New test.

diff --git a/libgomp/target.c b/libgomp/target.c
index 74a031f..e6d00c5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3909,9 +3909,13 @@ omp_target_is_accessible (const void *ptr, size_t size, 
int device_num)
   if (devicep == NULL)
 return false;
 
-  /* TODO: Unified shared memory must be handled when available.  */
+  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+return true;
 
-  return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
+  if (devicep->is_usm_ptr_func && devicep->is_usm_ptr_func ((void *) ptr))
+return true;
+
+  return false;
 }
 
 int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
index 7c2cf62..e3f494b 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-1.c
@@ -23,23 +23,28 @@ main ()
   if (omp_target_is_accessible (p, sizeof (int), n + 1))
 __builtin_abort ();
 
-  /* Currently, a host pointer is accessible if the device supports shared
- memory or omp_target_is_accessible is executed on the host. This
- test case must be adapted when unified shared memory is avialable.  */
   int a[128];
   for (int d = 0; d <= omp_get_num_devices (); d++)
 {
+  /* SHARED_MEM is 1 if and only if host and device share the same memory.
+OMP_TARGET_IS_ACCESSIBLE should not return 0 for shared memory.  */
   int shared_mem = 0;
   #pragma omp target map (alloc: shared_mem) device (d)
shared_mem = 1;
-  if (omp_target_is_accessible (p, sizeof (int), d) != shared_mem)
+
+  if (shared_mem && !omp_target_is_accessible (p, sizeof (int), d))
+   __builtin_abort ();
+
+  /* USM is disabled by default.  Hence OMP_TARGET_IS_ACCESSIBLE should
+return 0 if shared_mem is false.  */
+  if (!shared_mem && omp_target_is_accessible (p, sizeof (int), d))
__builtin_abort ();
 
-  if (omp_target_is_accessible (a, 128 * sizeof (int), d) != shared_mem)
+  if (shared_mem && !omp_target_is_accessible (a, 128 * sizeof (int), d))
__builtin_abort ();
 
   for (int i = 0; i < 128; i++)
-   if (omp_target_is_accessible ([i], sizeof (int), d) != shared_mem)
+   if (shared_mem && !omp_target_is_accessible ([i], sizeof (int), d))
  __builtin_abort ();
 }
 
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-2.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-2.c
new file mode 100644
index 000..24af51f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-is-accessible-2.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-skip-if "USM is only implemented for nvptx." { ! offload_target_nvptx 
} } */
+
+#include 
+#include 
+
+#pragma omp requires unified_shared_memory
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc (sizeof(int), ompx_unified_shared_mem_alloc);
+  if (!a)
+__builtin_abort ();
+
+  for (int d = 0; d <= omp_get_num_devices (); d++)
+if (!omp_target_is_accessible (a, sizeof (int), d))
+  __builtin_abort ();
+
+  omp_free(a, ompx_unified_shared_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90 
b/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
index 2611855..015f74a 100644
--- a/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/target-is-accessible-1.f90
@@ -1,3 +1,5 @@
+! { dg-do run }
+
 program main
   use omp_lib
   use iso_c_binding
@@ -25,24 +27,28 @@ program main
   if (omp_target_is_accessible (p, c_sizeof (d), n + 1) /= 0) &
 stop 4
 
-  ! Currently, a host pointer is accessible if the device supports shared
-  ! memory or omp_target_is_accessible is executed on the host. This
-  ! test case must be adapted when 

Re: [Patch] OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.

2022-05-06 Thread Marcel Vollweiler

Hi Jakub,

Am 05.05.2022 um 11:33 schrieb Jakub Jelinek:

On Mon, Mar 14, 2022 at 04:42:14PM +0100, Marcel Vollweiler wrote:

--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -226,6 +226,11 @@ OMP_5.1 {
 omp_get_teams_thread_limit_;
  } OMP_5.0.2;

+OMP_5.1.1 {
+  global:
+omp_target_is_accessible;
+} OMP_5.1;
+


You've already added another OMP_5.1.1 symbol, so this hunk will need to be
adjusted.  Keep the names in there alphabetically sorted.


Adjusted.


--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -835,6 +835,16 @@
end function omp_target_disassociate_ptr
  end interface

+interface
+  function omp_target_is_accessible (ptr, size, device_num) bind(c)
+use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
+integer(c_int) :: omp_target_is_accessible


The function returning integer(c_int) rather than logical seems like
a screw up in the standard, but too late to fix that :(.


--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3666,6 +3666,24 @@ omp_target_disassociate_ptr (const void *ptr, int 
device_num)
  }

  int
+omp_target_is_accessible (const void *ptr, size_t size, int device_num)
+{
+  if (device_num < 0 || device_num > gomp_get_num_devices ())
+return false;
+
+  if (device_num == gomp_get_num_devices ())
+return true;
+
+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+return false;
+
+  /* TODO: Unified shared memory must be handled when available.  */
+
+  return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;


I guess for now it is reasonable, but I wonder if even without
GOMP_OFFLOAD_CAP_SHARED_MEM one can't for CUDA or GCN allocate host
memory (not all, but just some subset) that will be accessible on the
device (I bet that means accessible through the same address on the host and
device, aka partial shared mem).


Currently, I am only aware of

(a) physically shared memory which is used for some architectures where CPU and
GPU are close together (handled via GOMP_OFFLOAD_CAP_SHARED_MEM) and
(b) unified shared memory as being more a logical memory sharing via managed
memory (using sth. like cudaMallocManaged).

For (b) I will submit a follow up patch very soon that depends on the submitted
but not yet approved/committed usm patches:
   https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591349.html



So, ok for trunk.

OT, tried to look how libomptarget implements it and they don't at least
on llvm-project trunk, but while looking at that, noticed that for
omp_target_is_present they do return false from omp_target_is_present
while we return true.  It is unclear if NULL has corresponding storage
on the device (NULL always corresponds to NULL on the device) or not.


That's indeed an interesting point. I am not sure whether returning "true" for a
given NULL pointer is the desired behaviour for omp_target_is_present. For the
host that might be ok (for whatever reason) but for offload devices this implies
that NULL is actually mapped to some address on the device (as far as I
understand the definition):

"The omp_target_is_present routine tests whether a host pointer refers to
storage that is mapped to a given device."

I don't know if such a "NULL mapping" is valid/useful.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[Patch] OpenMP, libgomp, gimple: omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices

2022-04-14 Thread Marcel Vollweiler

Hi,

This patch adds support for omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit on offload devices.

The patch builds on the following patches which are submitted, but not yet
approved/committed:
- [PATCH] OpenMP, libgomp: Environment variable syntax extension.
https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588728.html
- [PATCH] OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.
https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591556.html

The OpenMP runtime routines omp_get_max_teams, omp_set_num_teams, and
omp_{gs}et_teams_thread_limit were introduced in OpenMP 5.1 and where already
implemented for the host usage with patch
https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581283.html

The new patch extends the functionality of these OpenMP runtime routines by the
usage also on the device, i.e. device-specific values for nteams-var and
teams-thread-limit-var ICVs can now be retrieved and set also on the device. The
updated number of teams/threads are then used when launching the kernel.

The following main aspects are considered:
(a) Implemented the functions in the according icv-device files.
(b) Added structures to not only store initial device-specific values (they have
to be kept for omp_display_env) but also device-specific ICV values that can be
changed on the device at runtime.
(c) Changed the gimplification:
(c.1) Introduced integer_minus_two_node.
(c.2) For target regions that do not include teams constructs, now the clause
num_teams(-2) is added instead num_teams(1). This was necessary as num_teams(1)
is ambigious: it can also mean that a teams construct with explicit num_teams(1)
clause was specified inside the target region. The disambiguation is needed in
order to choose the correct thread limit: teams-thread-limit-var is only
intended for teams constructs such that if there is no teams construct, then the
number of threads is limited by thread-limit-var.
(d) Extend GOMP_target_ext. The host needs to set the device-specific ICV values
before the kernel is launched. The number of teams and threads are members of
the args list and are modified when no value was specified in an explicit clause
and the computation of the value was not postponed due to mapped variables.
(d.1) The arguments list is copied in order to guarantee immutability.
(e) Added copy back mechanism for ICVs which are modified on the device. The
only way to change device-specific ICVs is to do it on the device. As the
device-specific values are sometimes needed also on the host when the kernel is
launched (particularly number of teams and threads) they have to be copied back.

The patch was tested on x86_64-linux with nvptx and gcn offloading. All with no
regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
gcc/ChangeLog:

* gimplify.cc (optimize_target_teams): Changed integer_one_node to
integer_minus_two_node in case of non-existing teams construct in target
region due to disambiguation. Previously, num_teams(1) was used as
clause on the target construct when (a) no teams construct exists in the
target region or (b) a teams construct with explicit num_teams(1)
clause was specified.
* tree-core.h (enum tree_index): Added TI_INTEGER_MINUS_TWO.
* tree.cc (build_common_tree_nodes): Added integer_minus_two_node.
* tree.h (integer_minus_two_node): Likewise.

libgomp/ChangeLog:

* config/gcn/icv-device.c (omp_set_num_teams): Added.
(omp_get_teams_thread_limit): Added.
(omp_set_teams_thread_limit): Added.
(ialias): Added for omp_set_num_teams and omp_{gs}et_teams_thread_limit.
* config/nvptx/icv-device.c (omp_set_num_teams): Likewise.
(omp_get_teams_thread_limit): Likewise.
(omp_set_teams_thread_limit): Likewise.
(ialias): Likewise.
* env.c (struct gomp_default_icv_t): Added to hold default ICV values.
(struct gomp_icv_list): Removed static.
(omp_display_env): Renaming of used lists.
(add_device_specific_icv): Removed static.
(gomp_add_device_specific_icv): Removed static.
(parse_device_specific): Renaming of used lists and added storing of
parsed values in lists of modifiable ICV values. 
* icv-device.c (omp_set_num_teams): Added.
(ialias): Added for omp_set_num_teams and omp_{gs}et_teams_thread_limit.
(omp_get_teams_thread_limit): Added.
(omp_set_teams_thread_limit): Added.
* icv.c (omp_set_num_teams): Removed.
(omp_set_teams_thread_limit): Removed.
(omp_get_teams_thread_limit): Removed.
(ialias): Removed for omp_set_num_teams and
omp_{gs}et_teams_thread_limit.
* libgomp-plugin.h 

Re: [PATCH] OpenMP, Fortran: Bugfix for omp_set_num_teams.

2022-03-16 Thread Marcel Vollweiler

Hi Jakub,


! { dg-do run }
! { dg-additional-options "-fdefault-integer-8" }

program set_num_teams_8
   use omp_lib
   omp_set_num_teams (42)
   if (omp_get_num_teams () .ne. 42) stop 1
end program


I modified your suggested test case a bit:

program set_num_teams_8
  use omp_lib
  use, intrinsic :: iso_fortran_env
  integer(int64) :: x
  x = 42
  call omp_set_num_teams (x)
  if (omp_get_max_teams () .ne. 42) stop 1
end program

I tested it with/without the fix and the test passed/failed as expected.

Hope, that's ok?

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, Fortran: Bugfix for omp_set_num_teams.

This patch fixes a small bug in the omp_set_num_teams implementation.

libgomp/ChangeLog:

* fortran.c (omp_set_num_teams_8_): Fix bug.
* testsuite/libgomp.fortran/icv-8.f90: New test.

diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 8c1cfd1..d984ce5 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -491,7 +491,7 @@ omp_set_num_teams_ (const int32_t *num_teams)
 void
 omp_set_num_teams_8_ (const int64_t *num_teams)
 {
-  omp_set_max_active_levels (TO_INT (*num_teams));
+  omp_set_num_teams (TO_INT (*num_teams));
 }
 
 int32_t
diff --git a/libgomp/testsuite/libgomp.fortran/icv-8.f90 
b/libgomp/testsuite/libgomp.fortran/icv-8.f90
new file mode 100644
index 000..9478c15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/icv-8.f90
@@ -0,0 +1,10 @@
+! This tests 'set_num_teams_8' function.
+
+program set_num_teams_8
+  use omp_lib
+  use, intrinsic :: iso_fortran_env
+  integer(int64) :: x
+  x = 42
+  call omp_set_num_teams (x)
+  if (omp_get_max_teams () .ne. 42) stop 1
+end program


[PATCH] OpenMP, Fortran: Bugfix for omp_set_num_teams.

2022-03-15 Thread Marcel Vollweiler

Hi,

This patch fixes a small bug for omp_set_num_teams in fortran.c.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, Fortran: Bugfix for omp_set_num_teams.

This patch fixes a small bug in the omp_set_num_teams implementation.

libgomp/ChangeLog:

* fortran.c (omp_set_num_teams_8_): Fix bug.

diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index 8c1cfd1..d984ce5 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -491,7 +491,7 @@ omp_set_num_teams_ (const int32_t *num_teams)
 void
 omp_set_num_teams_8_ (const int64_t *num_teams)
 {
-  omp_set_max_active_levels (TO_INT (*num_teams));
+  omp_set_num_teams (TO_INT (*num_teams));
 }
 
 int32_t


Re: [Patch] OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.

2022-03-14 Thread Marcel Vollweiler

Hi Tobias,


Minor remark to the test:

On 11.03.22 13:30, Marcel Vollweiler wrote:

+  int d = omp_get_default_device ();

...

+  int shared_mem = 0;
+  #pragma omp target map (alloc: shared_mem) device (d)
+shared_mem = 1;
+  if (omp_target_is_accessible (p, sizeof (int), d) != shared_mem)
+__builtin_abort ();


I wonder whether it makes sense to do instead
   for (d = 0; d <= omp_get_num_devices(); ++d)
instead of just
   d = omp_get_default_device();
given that we have already found once in a while bugs when testing more
than just the default device - be it because devices differed or because
'0' was special.

In particular, I could image having at the same time two or three devices
available of type intelmic + gcn + nvptx, possibly mixing shared memory,
nonshared memory and semi-shared memory*


Good hint, thanks. I updated the C(++) and Fortran tests accordingly and
attached the updated patch.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.

gcc/ChangeLog:

* omp-low.cc (omp_runtime_api_call): Added target_is_accessible to
omp_runtime_apis array.

libgomp/ChangeLog:

* libgomp.map: Added omp_target_is_accessible.
* libgomp.texi: Tagged omp_target_is_accessible as supported.
* omp.h.in: Added omp_target_is_accessible.
* omp_lib.f90.in: Added interface for omp_target_is_accessible.
* omp_lib.h.in: Likewise.
* target.c (omp_target_is_accessible): Added implementation of
omp_target_is_accessible.
* testsuite/libgomp.c-c++-common/target-is-accessible-1.c: New test.
* testsuite/libgomp.fortran/target-is-accessible-1.f90: New test.

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176ef..bf38fad 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -3959,6 +3959,7 @@ omp_runtime_api_call (const_tree fndecl)
   "target_associate_ptr",
   "target_disassociate_ptr",
   "target_free",
+  "target_is_accessible",
   "target_is_present",
   "target_memcpy",
   "target_memcpy_rect",
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2ac5809..1764380 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -226,6 +226,11 @@ OMP_5.1 {
omp_get_teams_thread_limit_;
 } OMP_5.0.2;
 
+OMP_5.1.1 {
+  global:
+   omp_target_is_accessible;
+} OMP_5.1;
+
 GOMP_1.0 {
   global:
GOMP_atomic_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 161a423..58e432c 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -311,7 +311,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{omp_set_num_teams}, @code{omp_set_teams_thread_limit},
   @code{omp_get_max_teams}, @code{omp_get_teams_thread_limit} runtime
   routines @tab Y @tab
-@item @code{omp_target_is_accessible} runtime routine @tab N @tab
+@item @code{omp_target_is_accessible} runtime routine @tab Y @tab
 @item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
   runtime routines @tab N @tab
 @item @code{omp_get_mapped_ptr} runtime routine @tab N @tab
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index 89c5d65..1ec7415 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -282,6 +282,8 @@ extern int omp_target_memcpy_rect (void *, const void *, 
__SIZE_TYPE__, int,
 extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 __SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
+extern int omp_target_is_accessible (const void *, __SIZE_TYPE__, int)
+  __GOMP_NOTHROW;
 
 extern void omp_set_affinity_format (const char *) __GOMP_NOTHROW;
 extern __SIZE_TYPE__ omp_get_affinity_format (char *, __SIZE_TYPE__)
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index daf40dc..f369507 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -835,6 +835,16 @@
   end function omp_target_disassociate_ptr
 end interface
 
+interface
+  function omp_target_is_accessible (ptr, size, device_num) bind(c)
+use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
+integer(c_int) :: omp_target_is_accessible
+type(c_ptr), value :: ptr
+integer(c_size_t), value :: size
+integer(c_int), value :: device_num
+  end function omp_target_is_accessible
+end interface
+
 #if _OPENMP >= 201811
 !GCC$ ATTRIBUTES DEPRECATED :: omp_get_nested, omp_set_nested
 #endif
diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in
index ff857a4..5ea0366 100644
--- a/libgomp/omp

[Patch] OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.

2022-03-11 Thread Marcel Vollweiler

Hi,

This patch adds the OpenMP runtime routine "omp_target_is_accessible" which was
introduced in OpenMP 5.1 (specification section 3.8.4):

"The omp_target_is_accessible routine tests whether host memory is accessible
from a given device."

"This routine returns true if the storage of size bytes starting at the address
given by ptr is accessible from device device_num. Otherwise, it returns false."

"The value of ptr must be a valid host pointer or NULL (or C_NULL_PTR, for
Fortran). The device_num argument must be greater than or equal to zero and less
than or equal to the result of omp_get_num_devices()."

"When called from within a target region the effect is unspecified."

Currently, the only way of accessing host memory on a non-host device is via
shared memory. This will change with unified shared memory (usm) that was
recently submitted but not yet approved/committed. A follow-up patch for
omp_target_is_accessible is planned considering usm when available. The current
patch handles the basic implementation for C/C++ and Fortran and includes
comments pointing to usm.

Although not explicitly specified in the OpenMP 5.1 standard, the implemented
function returns "true" if the given device_num is equal to
"omp_get_num_devices" (i.e. the host) as it is expected that host memory can be
accessed from the host device.

The patch was tested on x86_64-linux and PowerPC, both with nvptx offloading.
All with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Add new runtime routine omp_target_is_accessible.

gcc/ChangeLog:

* omp-low.cc (omp_runtime_api_call): Added target_is_accessible to
omp_runtime_apis array.

libgomp/ChangeLog:

* libgomp.map: Added omp_target_is_accessible.
* libgomp.texi: Tagged omp_target_is_accessible as supported.
* omp.h.in: Added omp_target_is_accessible.
* omp_lib.f90.in: Added interface for omp_target_is_accessible.
* omp_lib.h.in: Likewise.
* target.c (omp_target_is_accessible): Added implementation of
omp_target_is_accessible.
* testsuite/libgomp.c-c++-common/target-is-accessible-1.c: New test.
* testsuite/libgomp.fortran/target-is-accessible-1.f90: New test.

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176ef..bf38fad 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -3959,6 +3959,7 @@ omp_runtime_api_call (const_tree fndecl)
   "target_associate_ptr",
   "target_disassociate_ptr",
   "target_free",
+  "target_is_accessible",
   "target_is_present",
   "target_memcpy",
   "target_memcpy_rect",
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2ac5809..1764380 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -226,6 +226,11 @@ OMP_5.1 {
omp_get_teams_thread_limit_;
 } OMP_5.0.2;
 
+OMP_5.1.1 {
+  global:
+   omp_target_is_accessible;
+} OMP_5.1;
+
 GOMP_1.0 {
   global:
GOMP_atomic_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 161a423..58e432c 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -311,7 +311,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{omp_set_num_teams}, @code{omp_set_teams_thread_limit},
   @code{omp_get_max_teams}, @code{omp_get_teams_thread_limit} runtime
   routines @tab Y @tab
-@item @code{omp_target_is_accessible} runtime routine @tab N @tab
+@item @code{omp_target_is_accessible} runtime routine @tab Y @tab
 @item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
   runtime routines @tab N @tab
 @item @code{omp_get_mapped_ptr} runtime routine @tab N @tab
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index 89c5d65..1ec7415 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -282,6 +282,8 @@ extern int omp_target_memcpy_rect (void *, const void *, 
__SIZE_TYPE__, int,
 extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 __SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
+extern int omp_target_is_accessible (const void *, __SIZE_TYPE__, int)
+  __GOMP_NOTHROW;
 
 extern void omp_set_affinity_format (const char *) __GOMP_NOTHROW;
 extern __SIZE_TYPE__ omp_get_affinity_format (char *, __SIZE_TYPE__)
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index daf40dc..f369507 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -835,6 +835,16 @@
   end function omp_target_disassociate_ptr
 end interface
 
+interface
+  function omp_target_is_accessible (ptr, size, device_num) bind(c)
+use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
+

Re: [PATCH] OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.

2022-03-10 Thread Marcel Vollweiler

Hi Jakub,

This is an update to the patch from Tue Mar 8:

https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591343.html

I just added "get_mapped_ptr" to the "omp_runtime_apis" array in omp-low.cc and
replaced "omp_get_num_devices" by "gomp_get_num_devices" in target.c.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.

gcc/ChangeLog:

* omp-low.cc (omp_runtime_api_call): Added get_mapped_ptr to
omp_runtime_apis array.

libgomp/ChangeLog:

* libgomp.map: Added omp_get_mapped_ptr.
* libgomp.texi: Tagged omp_get_mapped_ptr as supported.
* omp.h.in: Added omp_get_mapped_ptr.
* omp_lib.f90.in: Added interface for omp_get_mapped_ptr.
* omp_lib.h.in: Likewise.
* target.c (omp_get_mapped_ptr): Added implementation of
omp_get_mapped_ptr.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-2.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-3.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-4.c: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-1.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-2.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-3.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-4.f90: New test.

diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176ef..02a0f72 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -3962,6 +3962,7 @@ omp_runtime_api_call (const_tree fndecl)
   "target_is_present",
   "target_memcpy",
   "target_memcpy_rect",
+  "get_mapped_ptr",
   NULL,
   /* Now omp_* calls that are available as omp_* and omp_*_; however, the
 DECL_NAME is always omp_* without tailing underscore.  */
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2ac5809..608a54c 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -226,6 +226,11 @@ OMP_5.1 {
omp_get_teams_thread_limit_;
 } OMP_5.0.2;
 
+OMP_5.1.1 {
+  global:
+   omp_get_mapped_ptr;
+} OMP_5.1;
+
 GOMP_1.0 {
   global:
GOMP_atomic_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 161a423..c163b56 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -314,7 +314,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{omp_target_is_accessible} runtime routine @tab N @tab
 @item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
   runtime routines @tab N @tab
-@item @code{omp_get_mapped_ptr} runtime routine @tab N @tab
+@item @code{omp_get_mapped_ptr} runtime routine @tab Y @tab
 @item @code{omp_calloc}, @code{omp_realloc}, @code{omp_aligned_alloc} and
   @code{omp_aligned_calloc} runtime routines @tab Y @tab
 @item @code{omp_alloctrait_key_t} enum: @code{omp_atv_serialized} added,
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index 89c5d65..18d0152 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -282,6 +282,7 @@ extern int omp_target_memcpy_rect (void *, const void *, 
__SIZE_TYPE__, int,
 extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 __SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
+extern void *omp_get_mapped_ptr (const void *, int) __GOMP_NOTHROW;
 
 extern void omp_set_affinity_format (const char *) __GOMP_NOTHROW;
 extern __SIZE_TYPE__ omp_get_affinity_format (char *, __SIZE_TYPE__)
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index daf40dc..506f15c 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -835,6 +835,15 @@
   end function omp_target_disassociate_ptr
 end interface
 
+interface
+  function omp_get_mapped_ptr (ptr, device_num) bind(c)
+use, intrinsic :: iso_c_binding, only : c_ptr, c_int
+type(c_ptr) :: omp_get_mapped_ptr
+type(c_ptr), value :: ptr
+integer(c_int), value :: device_num
+  end function omp_get_mapped_ptr
+end interface
+
 #if _OPENMP >= 201811
 !GCC$ ATTRIBUTES DEPRECATED :: omp_get_nested, omp_set_nested
 #endif
diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in
index ff857a4..0f48510 100644
--- a/libgomp/omp_lib.h.in
+++ b/libgomp/omp_lib.h.in
@@ -416,3 +416,12 @@
   integer(c_int), value :: device_num
 end function omp_target_disassociate_ptr
   end interface
+
+  interface
+function omp_get_mapped_ptr (ptr, device_num) bind(c)
+  use, intrinsic :: iso_c_binding, only : c_ptr, c_int
+  type(c_ptr) :: omp_get_mapped_ptr
+  

Re: [PATCH] OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.

2022-03-08 Thread Marcel Vollweiler

Hi Jakub,


diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2ac5809..00a4858 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -224,6 +224,7 @@ OMP_5.1 {
 omp_set_teams_thread_limit_8_;
 omp_get_teams_thread_limit;
 omp_get_teams_thread_limit_;
+omp_get_mapped_ptr;
  } OMP_5.0.2;


I think it is too late for this to be targetted for GCC 12, and
for GCC 13 it will need to go into OMP_5.1.1 symver.


Agreed and changed accordingly.


+void *
+omp_get_mapped_ptr (const void *ptr, int device_num)
+{
+  if (device_num < 0 || device_num > omp_get_num_devices ())
+return NULL;
+
+  if (device_num == omp_get_initial_device ())
+return (void*)ptr;


Space before * and space after )


Changed.


+  struct gomp_device_descr *devicep = resolve_device (device_num);
+  if (devicep == NULL)
+return NULL;
+
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+  || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+return (void*)ptr;


Likewise.


Changed.


+
+  gomp_mutex_lock (>lock);
+
+  struct splay_tree_s *mem_map = >mem_map;
+  struct splay_tree_key_s cur_node;
+  void *ret = NULL;
+  uintptr_t offset = 0;


offset should be moved to the only place that defines it.


Changed.


+
+  cur_node.host_start = (uintptr_t) ptr;
+  cur_node.host_end = cur_node.host_start;
+  splay_tree_key n = gomp_map_0len_lookup (mem_map, _node);
+
+  if (n && n->host_start == cur_node.host_start)
+{
+  ret = (void*) n->tgt->tgt_start + n->tgt_offset;
+}


Single statement body, so without {}s and reindented, space before *.

+  else if (n)
+{
+  offset = cur_node.host_start - n->host_start;

   uintptr_t offset = cur_node.host_start - n->host_start;


+  ret = (void*) n->tgt->tgt_start + n->tgt_offset + offset;


Space before *.

Though, looking at this more, what is the point of the first if?
The second if would compute offset = 0...


Absolutely true :)
Changed.



Also, void * arithmetics is a GNU extension, maybe better use char *.


I changed it to (enclosing parentheses):

   ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);

i.e. pointer arithmetic is done on uintptr_t, but I am not completely sure if
that's sufficient in terms of compatibility. On the other hand,

   ret = (void *) ((char *) n->tgt->tgt_start + (char *) n->tgt_offset + 
(char *)
offset);

is perhaps overcomplicated if not really necessary. What do you think?


+  if (omp_get_mapped_ptr (q, -1) != NULL)
+__builtin_abort ();


When you do include stdlib.h, what is the point of using __builtin_abort ?
Just use abort then.


Good point. Changed.

Thanks,

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.

libgomp/ChangeLog:

* libgomp.map: Added omp_get_mapped_ptr.
* libgomp.texi: Tagged omp_get_mapped_ptr as supported.
* omp.h.in: Added omp_get_mapped_ptr.
* omp_lib.f90.in: Added interface for omp_get_mapped_ptr.
* omp_lib.h.in: Likewise.
* target.c (omp_get_mapped_ptr): Added implementation of
omp_get_mapped_ptr.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-2.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-3.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-4.c: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-1.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-2.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-3.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-4.f90: New test.

diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2ac5809..608a54c 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -226,6 +226,11 @@ OMP_5.1 {
omp_get_teams_thread_limit_;
 } OMP_5.0.2;
 
+OMP_5.1.1 {
+  global:
+   omp_get_mapped_ptr;
+} OMP_5.1;
+
 GOMP_1.0 {
   global:
GOMP_atomic_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 161a423..c163b56 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -314,7 +314,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{omp_target_is_accessible} runtime routine @tab N @tab
 @item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
   runtime routines @tab N @tab
-@item @code{omp_get_mapped_ptr} runtime routine @tab N @tab
+@item @code{omp_get_mapped_ptr} runtime routine @tab Y @tab
 @item @code{omp_calloc}, @code{omp_realloc}, @code{omp_aligned_alloc} and
   @code{omp_aligned_calloc} runtime routines @tab Y @tab
 @item @code{omp_alloctrait_key_t} enum: @code{omp_atv_serialized} added,
diff --git 

[PATCH] OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.

2022-03-04 Thread Marcel Vollweiler

Hi,

This patch adds the OpenMP runtime routine "omp_get_mapped_ptr" which was
introduced in OpenMP 5.1 (specification section 3.8.11):

"The omp_get_mapped_ptr routine returns the device pointer that is associated
with a host pointer for a given device."

"The device_num argument must be greater than or equal to zero and less than or
equal to the result of omp_get_num_devices()."

"A call to this routine for a pointer that is not NULL (or C_NULL_PTR, for
Fortran) and does not have an associated pointer on the given device results in
a NULL pointer."

"The routine returns NULL (or C_NULL_PTR, for Fortran) if unsuccessful.
Otherwise it returns the device pointer, which is ptr if device_num is the value
returned by omp_get_initial_device()."

Implementation and tests were added for C/C++ and Fortran.

There is a small inconvenience considering zero-length arrays as list items of
the "target map" construct: it seems that zero-length arrays are not associated
correctly there, such that omp_get_mapped_ptr returns NULL instead of the
associated device pointer - in contrast to the situation where a device pointer
is associated with the host pointer via omp_target_associate_ptr.
However, the result for omp_get_mapped_ptr is consistent with
omp_target_is_present (which returns 0, i.e. "not present") in this situation.

The patch was tested on x86_64-linux with nvptx and amdgcn offloading. All with
no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Add new runtime routine omp_get_mapped_ptr.

libgomp/ChangeLog:

* libgomp.map: Added omp_get_mapped_ptr.
* libgomp.texi: Tagged omp_get_mapped_ptr as supported.
* omp.h.in: Added omp_get_mapped_ptr.
* omp_lib.f90.in: Added interface for omp_get_mapped_ptr.
* omp_lib.h.in: Likewise.
* target.c (omp_get_mapped_ptr): Added implementation of
omp_get_mapped_ptr.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-1.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-2.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-3.c: New test.
* testsuite/libgomp.c-c++-common/get-mapped-ptr-4.c: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-1.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-2.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-3.f90: New test.
* testsuite/libgomp.fortran/get-mapped-ptr-4.f90: New test.

diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 2ac5809..00a4858 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -224,6 +224,7 @@ OMP_5.1 {
omp_set_teams_thread_limit_8_;
omp_get_teams_thread_limit;
omp_get_teams_thread_limit_;
+   omp_get_mapped_ptr;
 } OMP_5.0.2;
 
 GOMP_1.0 {
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 161a423..c163b56 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -314,7 +314,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{omp_target_is_accessible} runtime routine @tab N @tab
 @item @code{omp_target_memcpy_async} and @code{omp_target_memcpy_rect_async}
   runtime routines @tab N @tab
-@item @code{omp_get_mapped_ptr} runtime routine @tab N @tab
+@item @code{omp_get_mapped_ptr} runtime routine @tab Y @tab
 @item @code{omp_calloc}, @code{omp_realloc}, @code{omp_aligned_alloc} and
   @code{omp_aligned_calloc} runtime routines @tab Y @tab
 @item @code{omp_alloctrait_key_t} enum: @code{omp_atv_serialized} added,
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index 89c5d65..18d0152 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -282,6 +282,7 @@ extern int omp_target_memcpy_rect (void *, const void *, 
__SIZE_TYPE__, int,
 extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 __SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
+extern void *omp_get_mapped_ptr (const void *, int) __GOMP_NOTHROW;
 
 extern void omp_set_affinity_format (const char *) __GOMP_NOTHROW;
 extern __SIZE_TYPE__ omp_get_affinity_format (char *, __SIZE_TYPE__)
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index daf40dc..506f15c 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -835,6 +835,15 @@
   end function omp_target_disassociate_ptr
 end interface
 
+interface
+  function omp_get_mapped_ptr (ptr, device_num) bind(c)
+use, intrinsic :: iso_c_binding, only : c_ptr, c_int
+type(c_ptr) :: omp_get_mapped_ptr
+type(c_ptr), value :: ptr
+integer(c_int), value :: device_num
+  end function omp_get_mapped_ptr
+

[PATCH] OpenMP, C++: Add template support for the has_device_addr clause.

2022-02-23 Thread Marcel Vollweiler

Hi,

The patch for adding the has_device_addr clause on the target construct was
recently committed (bbb7f8604e1dfc08f44354cfd93d2287f2fdd489).

Additionally, this patch adds support for list items in the has_device_addr
clause which type is given by C++ template parameters.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, C++: Add template support for the has_device_addr clause.

gcc/cp/ChangeLog:

* pt.cc (tsubst_omp_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR.
* semantics.cc (finish_omp_clauses): Handle PARM_DECL and
NON_LVALUE_EXPR.

gcc/ChangeLog:

* gimplify.cc (gimplify_scan_omp_clauses): Handle NON_LVALUE_EXPR.
(gimplify_adjust_omp_clauses): Likewise.
* omp-low.cc (scan_sharing_clauses): Likewise.
(lower_omp_target): Likewise.

libgomp/ChangeLog:

* testsuite/libgomp.c++/target-has-device-addr-7.C: New test.
* testsuite/libgomp.c++/target-has-device-addr-8.C: New test.
* testsuite/libgomp.c++/target-has-device-addr-9.C: New test.

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 6dda660..86446d7 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -17652,6 +17652,7 @@ tsubst_omp_clauses (tree clauses, enum 
c_omp_region_type ort,
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE_USE_DEVICE_ADDR:
case OMP_CLAUSE_IS_DEVICE_PTR:
+   case OMP_CLAUSE_HAS_DEVICE_ADDR:
case OMP_CLAUSE_INCLUSIVE:
case OMP_CLAUSE_EXCLUSIVE:
  OMP_CLAUSE_DECL (nc)
@@ -17797,6 +17798,7 @@ tsubst_omp_clauses (tree clauses, enum 
c_omp_region_type ort,
  case OMP_CLAUSE_USE_DEVICE_PTR:
  case OMP_CLAUSE_USE_DEVICE_ADDR:
  case OMP_CLAUSE_IS_DEVICE_PTR:
+ case OMP_CLAUSE_HAS_DEVICE_ADDR:
  case OMP_CLAUSE_INCLUSIVE:
  case OMP_CLAUSE_EXCLUSIVE:
  case OMP_CLAUSE_ALLOCATE:
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 0cb17a6..452ecfd 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -8534,11 +8534,14 @@ finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
{
  if (handle_omp_array_sections (c, ort))
remove = true;
+ else if (TREE_CODE (TREE_CHAIN (t)) == PARM_DECL)
+   t = TREE_CHAIN (t);
  else
{
  t = OMP_CLAUSE_DECL (c);
  while (TREE_CODE (t) == INDIRECT_REF
-|| TREE_CODE (t) == ARRAY_REF)
+|| TREE_CODE (t) == ARRAY_REF
+|| TREE_CODE (t) == NON_LVALUE_EXPR)
t = TREE_OPERAND (t, 0);
}
}
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index f570daa..b1bb5be 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -10285,7 +10285,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
case OMP_CLAUSE_HAS_DEVICE_ADDR:
  decl = OMP_CLAUSE_DECL (c);
  while (TREE_CODE (decl) == INDIRECT_REF
-|| TREE_CODE (decl) == ARRAY_REF)
+|| TREE_CODE (decl) == ARRAY_REF
+|| TREE_CODE (decl) == NON_LVALUE_EXPR)
decl = TREE_OPERAND (decl, 0);
  flags = GOVD_EXPLICIT;
  goto do_add_decl;
@@ -11443,7 +11444,8 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, 
gimple_seq body, tree *list_p,
case OMP_CLAUSE_HAS_DEVICE_ADDR:
  decl = OMP_CLAUSE_DECL (c);
  while (TREE_CODE (decl) == INDIRECT_REF
-|| TREE_CODE (decl) == ARRAY_REF)
+|| TREE_CODE (decl) == ARRAY_REF
+|| TREE_CODE (decl) == NON_LVALUE_EXPR)
decl = TREE_OPERAND (decl, 0);
  n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
  remove = n == NULL || !(n->value & GOVD_SEEN);
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176ef..30cc9b6 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -1384,7 +1384,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
}
  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR)
{
- if (TREE_CODE (decl) == INDIRECT_REF)
+ if (TREE_CODE (decl) == INDIRECT_REF
+ || TREE_CODE (decl) == NON_LVALUE_EXPR)
decl = TREE_OPERAND (decl, 0);
  install_var_field (decl, true, 3, ctx);
}
@@ -1747,7 +1748,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR)
{
  while (TREE_CODE (decl) == INDIRECT_REF
-|| TREE_CODE (decl) == ARRAY_REF)
+|| TREE_CODE (decl) == ARRAY_REF
+|| 

[PATCH] OpenMP, libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_async

2022-02-21 Thread Marcel Vollweiler

Hi,

This patch adds two new OpenMP runtime routines: omp_target_memcpy_async and
omp_target_memcpy_rect_async. Both functions are introduced in OpenMP 5.1 as
asynchronous variants of omp_target_memcpy and omp_target_memcpy_rect.

In contrast to the synchronous variants, the asynchronous functions have two
additional function parameters to allow the specification of task dependences:

   int depobj_count
   omp_depend_t *depobj_list

   integer(c_int), value :: depobj_count
   integer(omp_depend_kind), optional :: depobj_list(*)

The implementation splits the synchronous functions into two parts: (a) check
and (b) copy. Then (a) is used in the asynchronous functions for the sequential
part, and the actual copy process (b) is executed in a new created task. The
sequential part (a) takes into account the requirements for the return values:

"The routine returns zero if successful. Otherwise, it returns a non-zero
value." (omp_target_memcpy_async, OpenMP 5.1 spec, section 3.8.7)

"An application can determine the number of inclusive dimensions supported by an
implementation by passing NULL pointers (or C_NULL_PTR, for Fortran) for both
dst and src. The routine returns the number of dimensions supported by the
implementation for the specified device numbers. No copy operation is
performed." (omp_target_memcpy_rect_async, OpenMP 5.1 spec, section 3.8.8)

Due to asynchronicity an error is thrown if the asynchronous memcpy is not
successful (in contrast to the synchronous functions which use a return
value unequal to zero).

The patch was tested on x86_64-linux with nvptx and amdgcn offloading and with
PowerPC with nvptx offloading. All with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Add new runtime routines omp_target_memcpy_async and
omp_target_memcpy_rect_async.

This patch adds two new OpenMP runtime routines: omp_target_memcpy_async and
omp_target_memcpy_rect_async. Both functions are introduced in OpenMP 5.1 as
asynchronous variants of omp_target_memcpy and omp_target_memcpy_rect.

In contrast to the synchronous variants, the asynchronous functions have two
additional function parameters to allow the specification of task dependences:

int depobj_count
omp_depend_t *depobj_list

integer(c_int), value :: depobj_count
integer(omp_depend_kind), optional :: depobj_list(*)

The implementation splits the synchronous functions into two parts: (a) check
and (b) copy. Then (a) is used in the asynchronous functions for the sequential
part, and the actual copy process (b) is executed in a new created task. The
sequential part (a) takes into account the requirements for the return values: 

"The routine returns zero if successful. Otherwise, it returns a non-zero
value." (omp_target_memcpy_async, OpenMP 5.1 spec, section 3.8.7)

"An application can determine the number of inclusive dimensions supported by an
implementation by passing NULL pointers (or C_NULL_PTR, for Fortran) for both
dst and src. The routine returns the number of dimensions supported by the
implementation for the specified device numbers. No copy operation is
performed." (omp_target_memcpy_rect_async, OpenMP 5.1 spec, section 3.8.8)

Due to asynchronicity an error is thrown if the asynchronous memcpy is not
successful (in contrast to the synchronous functions which use a return
value unequal to zero).

gcc/ChangeLog:

* omp-low.cc (omp_runtime_api_call): Added target_memcpy_async and
target_memcpy_rect_async to omp_runtime_apis array.

libgomp/ChangeLog:

* libgomp.map: Added omp_target_memcpy_async and
omp_target_memcpy_rect_async.
* libgomp.texi: Both functions are now supported.
* omp.h.in: Added omp_target_memcpy_async and
omp_target_memcpy_rect_async.
* omp_lib.f90.in: Added interfaces for both new functions.
* omp_lib.h.in: Likewise.
* target.c (omp_target_memcpy): Restructured into check and copy part.
(omp_target_memcpy_check): New helper function for omp_target_memcpy and
omp_target_memcpy_async that checks requirements.
(omp_target_memcpy_copy): New helper function for omp_target_memcpy and
omp_target_memcpy_async that performs the memcpy.
(omp_target_memcpy_async_helper): New helper function that is used in
omp_target_memcpy_async for the asynchronous task.
(omp_target_memcpy_async): Added.
(omp_target_memcpy_rect): Restructured into check and copy part.
(omp_target_memcpy_rect_check): New helper function for
omp_target_memcpy_rect and omp_target_memcpy_rect_async that checks
requirements.
(omp_target_memcpy_rect_copy): New helper function for

Re: [PATCH] C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to 'target' construct

2022-02-02 Thread Marcel Vollweiler

Hi Jakub,


+case OMP_CLAUSE_HAS_DEVICE_ADDR:
+  t = OMP_CLAUSE_DECL (c);
+  if (TREE_CODE (t) == TREE_LIST)
+{
+  if (handle_omp_array_sections (c, ort))
+remove = true;
+  else
+{
+  t = OMP_CLAUSE_DECL (c);
+  while (TREE_CODE (t) == ARRAY_REF)
+t = TREE_OPERAND (t, 0);
+}
+}
+  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR)
+bitmap_set_bit (_on_device_head, DECL_UID (t));


Why the OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR check?
There is no goto into this block nor fallthru into it, and
handle_omp_array_sections better shouldn't change OMP_CLAUSE_CODE.


Good point. Removed.




   goto check_dup_generic;

+case OMP_CLAUSE_HAS_DEVICE_ADDR:
+  t = OMP_CLAUSE_DECL (c);
+  if (TREE_CODE (t) == TREE_LIST)
+if (handle_omp_array_sections (c, ort))
+  remove = true;
+else
+  {
+t = OMP_CLAUSE_DECL (c);
+while (TREE_CODE (t) == ARRAY_REF)
+  t = TREE_OPERAND (t, 0);
+  }
+  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR)
+bitmap_set_bit (_on_device_head, DECL_UID (t));


Likewise.


Removed.




+  if (VAR_P (t) || TREE_CODE (t) == PARM_DECL)
+cxx_mark_addressable (t);
+  goto check_dup_generic_t;
+
 case OMP_CLAUSE_USE_DEVICE_ADDR:
   field_ok = true;
   t = OMP_CLAUSE_DECL (c);



--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1391,7 +1391,8 @@ enum
OMP_LIST_USE_DEVICE_PTR,
OMP_LIST_USE_DEVICE_ADDR,
OMP_LIST_NONTEMPORAL,
-  OMP_LIST_NUM
+  OMP_LIST_HAS_DEVICE_ADDR,
+  OMP_LIST_NUM  /* must be the last  */


Capital M and . at the end.


Changed.




@@ -2077,6 +2078,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
 }
   break;
 case 'h':
+  if ((mask & OMP_CLAUSE_HAS_DEVICE_ADDR)
+  && gfc_match_omp_variable_list
+   ("has_device_addr (",
+>lists[OMP_LIST_HAS_DEVICE_ADDR], false, NULL, NULL,
+ true) == MATCH_YES)


Formatting, true should be IMO below >lists.


Corrected the formatting.




+continue;
   if ((mask & OMP_CLAUSE_HINT)
   && (m = gfc_match_dupl_check (!c->hint, "hint", true, >hint))
  != MATCH_NO)
@@ -2850,7 +2857,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
   if ((mask & OMP_CLAUSE_USE_DEVICE_ADDR)
   && gfc_match_omp_variable_list
("use_device_addr (",
->lists[OMP_LIST_USE_DEVICE_ADDR], false) == MATCH_YES)
+>lists[OMP_LIST_USE_DEVICE_ADDR], false, NULL, NULL,
+ true) == MATCH_YES)


Likewise.


Corrected.




--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1910,7 +1910,17 @@ gfc_trans_omp_variable_list (enum omp_clause_code code,
 tree t = gfc_trans_omp_variable (namelist->sym, declare_simd);
 if (t != error_mark_node)
   {
-tree node = build_omp_clause (input_location, code);
+tree node;
+/* For HAS_DEVICE_ADDR of an array descriptor, firstprivatize the
+   descriptor such that the bounds are available; its data component
+   is unmodified; it is handled as device address inside target. */
+if (code == OMP_CLAUSE_HAS_DEVICE_ADDR
+&& (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (t))
+|| (POINTER_TYPE_P (TREE_TYPE (t))
+&& GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (t))
+  node = build_omp_clause (input_location, OMP_CLAUSE_FIRSTPRIVATE);


Not sure about the above,


This is needed for allocatable arrays and array pointers to ensure that
not only the (array) data is (already) present on the device but also
the array descriptor. Otherwise the test cases
target-has-device-addr-2.f90, target-has-device-addr-3.f90 (because of
variable "c") and target-has-device-addr-4.f90 (also because of variable
"c") won't work.




--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -10024,6 +10024,15 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
*pre_p,
   flags = GOVD_EXPLICIT;
   goto do_add;

+case OMP_CLAUSE_HAS_DEVICE_ADDR:
+  decl = OMP_CLAUSE_DECL (c);
+  if (TREE_CODE (decl) == ARRAY_REF)
+{
+  flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT;
+  while (TREE_CODE (decl) == ARRAY_REF)
+decl = TREE_OPERAND (decl, 0);
+  goto do_add_decl;


but this looks weird.
If decl after stripping the ARRAY_REFs is a var with pointer type, sure,
firstprivatizing it is the way to go.
But it can be also a variable with ARRAY_TYPE, can't it?  Something like:
   int a[64];
   #pragma omp target data map(a) use_device_addr(a)
   {
 #pragma omp target has_device_addr(a[3:16])
 a[3] = 1;
   }
and in this case 

[PATCH] OpenMP, libgomp: Environment variable syntax extension.

2022-01-18 Thread Marcel Vollweiler

Hi,

This patch considers the environment variable syntax extension for
device-specific variants of environment variables from OpenMP 5.1 (see
OpenMP 5.1 specification, p. 75 and p. 639). An environment variable
(e.g. OMP_NUM_TEAMS) can have different suffixes:

_DEV (e.g. OMP_NUM_TEAMS_DEV): affects all devices but not the host.
_DEV_ (e.g. OMP_NUM_TEAMS_DEV_42): affects only device with
number .
no suffix (e.g. OMP_NUM_TEAMS): affects only the host.

In future OpenMP versions also suffix _ALL will be introduced (see
discussion https://github.com/OpenMP/spec/issues/3179). This is also
considered in this patch:

_ALL (e.g. OMP_NUM_TEAMS_ALL): affects all devices and the host.

The precedence is as follows (descending). For the host:

1. no suffix
2. _ALL

For devices:

1. _DEV_
2. _DEV
3. _ALL

That means, _DEV_ is used whenever available. Otherwise _DEV is
used if available, and at last _ALL. If there is no value for any of the
variable variants, default values are used as already implemented before.

This patch concerns parsing (a), storing (b), output (c) and
transmission to the device (d):

(a) The actual number of devices and the numbering are not known when
parsing the environment variables. Thus all environment variables are
iterated and searched for device-specific ones.

(b) Only configured device-specific variables are stored. Thus, linked
lists are used.

(c) The output is done in omp_display_env (see specification p. 468f).
Global ICVs are tagged with [all], see
https://github.com/OpenMP/spec/issues/3179. ICVs which are not global
but aren't handled device-specific yet are tagged with [host].
omp_display_env outputs the initial values of the ICVs. That's why
separate data structures are introduced (like gomp_initial_icv...).

(d) Device-specific ICVs which are already user accessible on the device
are transmitted to the device (moreover nteams-var is added and used for
the tests). There are ICVs which values are currently set explicitly in
the config when copying them to the device: GOMP_NTHREADS_VAR,
GOMP_THREAD_LIMIT_VAR, GOMP_DYN_VAR (see gomp_gcn_enter_kernel in
libgomp/config/gcn/team.c and gomp_nvptx_main in
libgomp/config/nvptx/team.c). The corresponding environment variables
are nevertheless parsed and stored device-specific but the transmission
to the device is not changed.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Environment variable syntax extension.

This patch considers the environment variable syntax extension for
device-specific variants of environment variables from OpenMP 5.1 (see
OpenMP 5.1 specification, p. 75 and p. 639). An environment variable (e.g.
OMP_NUM_TEAMS) can have different suffixes:

_DEV (e.g. OMP_NUM_TEAMS_DEV): affects all devices but not the host.
_DEV_ (e.g. OMP_NUM_TEAMS_DEV_42): affects only device with
number .
no suffix (e.g. OMP_NUM_TEAMS): affects only the host.

In future OpenMP versions also suffix _ALL will be introduced (see discussion
https://github.com/OpenMP/spec/issues/3179). This is also considered in this
patch:

_ALL (e.g. OMP_NUM_TEAMS_ALL): affects all devices and the host.

The precedence is as follows (descending). For the host:

1. no suffix
2. _ALL

For devices:

1. _DEV_
2. _DEV
3. _ALL

That means, _DEV_ is used whenever available. Otherwise _DEV is used if
available, and at last _ALL. If there is no value for any of the variable
variants, default values are used as already implemented before.

This patch concerns parsing (a), storing (b), output (c) and transmission to 
the device (d):

(a) The actual number of devices and the numbering are not known when parsing
the environment variables. Thus all environment variables are iterated and
searched for device-specific ones.
(b) Only configured device-specific variables are stored. Thus, linked lists
are used.
(c) The output is done in omp_display_env (see specification p. 468f). Global
ICVs are tagged with [all], see https://github.com/OpenMP/spec/issues/3179.
ICVs which are not global but aren't handled device-specific yet are tagged
with [host]. omp_display_env outputs the initial values of the ICVs. That's why
separate data structures are introduced (like gomp_initial_icv...).
(d) Device-specific ICVs which are already user accessible on the device are
transmitted to the device (moreover nteams-var is added and used for the tests).
There are ICVs which values are currently set explicitly in the config when
copying them to the device: GOMP_NTHREADS_VAR, GOMP_THREAD_LIMIT_VAR,
GOMP_DYN_VAR (see gomp_gcn_enter_kernel in libgomp/config/gcn/team.c and
gomp_nvptx_main in libgomp/config/nvptx/team.c). The corresponding environment
variables are nevertheless 

OpenMP, libgomp: Environment variable syntax extension.

2022-01-18 Thread Marcel Vollweiler

Hi,

This patch considers the environment variable syntax extension for
device-specific variants of environment variables from OpenMP 5.1 (see
OpenMP 5.1 specification, p. 75 and p. 639). An environment variable
(e.g. OMP_NUM_TEAMS) can have different suffixes:

_DEV (e.g. OMP_NUM_TEAMS_DEV): affects all devices but not the host.
_DEV_ (e.g. OMP_NUM_TEAMS_DEV_42): affects only device with
number .
no suffix (e.g. OMP_NUM_TEAMS): affects only the host.

In future OpenMP versions also suffix _ALL will be introduced (see
discussion https://github.com/OpenMP/spec/issues/3179). This is also
considered in this patch:

_ALL (e.g. OMP_NUM_TEAMS_ALL): affects all devices and the host.

The precedence is as follows (descending). For the host:

   1. no suffix
   2. _ALL

For devices:

   1. _DEV_
   2. _DEV
   3. _ALL

That means, _DEV_ is used whenever available. Otherwise _DEV is
used if available, and at last _ALL. If there is no value for any of the
variable variants, default values are used as already implemented before.

This patch concerns parsing (a), storing (b), output (c) and
transmission to the device (d):

(a) The actual number of devices and the numbering are not known when
parsing the environment variables. Thus all environment variables are
iterated and searched for device-specific ones.

(b) Only configured device-specific variables are stored. Thus, linked
lists are used.

(c) The output is done in omp_display_env (see specification p. 468f).
Global ICVs are tagged with [all], see
https://github.com/OpenMP/spec/issues/3179. ICVs which are not global
but aren't handled device-specific yet are tagged with [host].
omp_display_env outputs the initial values of the ICVs. That's why
separate data structures are introduced (like gomp_initial_icv...).

(d) Device-specific ICVs which are already user accessible on the device
are transmitted to the device (moreover nteams-var is added and used for
the tests). There are ICVs which values are currently set explicitly in
the config when copying them to the device: GOMP_NTHREADS_VAR,
GOMP_THREAD_LIMIT_VAR, GOMP_DYN_VAR (see gomp_gcn_enter_kernel in
libgomp/config/gcn/team.c and gomp_nvptx_main in
libgomp/config/nvptx/team.c). The corresponding environment variables
are nevertheless parsed and stored device-specific but the transmission
to the device is not changed.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
OpenMP, libgomp: Environment variable syntax extension.

This patch considers the environment variable syntax extension for
device-specific variants of environment variables from OpenMP 5.1 (see
OpenMP 5.1 specification, p. 75 and p. 639). An environment variable (e.g.
OMP_NUM_TEAMS) can have different suffixes:

_DEV (e.g. OMP_NUM_TEAMS_DEV): affects all devices but not the host.
_DEV_ (e.g. OMP_NUM_TEAMS_DEV_42): affects only device with
number .
no suffix (e.g. OMP_NUM_TEAMS): affects only the host.

In future OpenMP versions also suffix _ALL will be introduced (see discussion
https://github.com/OpenMP/spec/issues/3179). This is also considered in this
patch:

_ALL (e.g. OMP_NUM_TEAMS_ALL): affects all devices and the host.

The precedence is as follows (descending). For the host:

1. no suffix
2. _ALL

For devices:

1. _DEV_
2. _DEV
3. _ALL

That means, _DEV_ is used whenever available. Otherwise _DEV is used if
available, and at last _ALL. If there is no value for any of the variable
variants, default values are used as already implemented before.

This patch concerns parsing (a), storing (b), output (c) and transmission to 
the device (d):

(a) The actual number of devices and the numbering are not known when parsing
the environment variables. Thus all environment variables are iterated and
searched for device-specific ones.
(b) Only configured device-specific variables are stored. Thus, linked lists
are used.
(c) The output is done in omp_display_env (see specification p. 468f). Global
ICVs are tagged with [all], see https://github.com/OpenMP/spec/issues/3179.
ICVs which are not global but aren't handled device-specific yet are tagged
with [host]. omp_display_env outputs the initial values of the ICVs. That's why
separate data structures are introduced (like gomp_initial_icv...).
(d) Device-specific ICVs which are already user accessible on the device are
transmitted to the device (moreover nteams-var is added and used for the tests).
There are ICVs which values are currently set explicitly in the config when
copying them to the device: GOMP_NTHREADS_VAR, GOMP_THREAD_LIMIT_VAR,
GOMP_DYN_VAR (see gomp_gcn_enter_kernel in libgomp/config/gcn/team.c and
gomp_nvptx_main in libgomp/config/nvptx/team.c). The corresponding environment
variables are 

Re: [PATCH] libgomp, OpenMP: Fix issue for omp_get_device_num on gfx targets.

2022-01-18 Thread Marcel Vollweiler

Hi Thomas,

Am 18.01.2022 um 13:25 schrieb Thomas Schwinge:

Hi!

Maybe I'm just totally confused -- as so often ;-) -- but things seem
strange here:

On 2022-01-12T10:43:05+0100, Marcel Vollweiler  wrote:

Currently omp_get_device_num does not work on gcn targets with more than
one offload device. The reason is that GOMP_DEVICE_NUM_VAR


I understand the 'GOMP_DEVICE_NUM_VAR' "macro indirection" is so that we
define the actual symbol name ('__gomp_device_num') in one place
('libgomp/libgomp-plugin.h'), and then use it (via macro expansion) in
several places, right?


Yes, as far as I understood.




is static in
icv-device.c and thus "__gomp_device_num" is not visible in the offload
image.


That behavior seems correct -- but undesired indeed?


Good question. In contrast to nvptx I observed that __gomp_device_num is
not part of the offload image which we read out in
GOMP_OFFLOAD_load_image ("if (status != HSA_STATUS_SUCCESS)" in
libgomp/plugin/plugin-gcn.c returns false). I validated it with some
additional output in the if-branches.




This patch removes "static" such that "__gomp_device_num" is now part of
the offload image and can now be found in GOMP_OFFLOAD_load_image in the
plugin.


That seems correct?

Or, is there a reason to have it 'static', say, so that several such
local variables can co-exist, instead of just one global one?


This is not an issue for nvptx. There, "__gomp_device_num" is in the
offload image even with "static".


That's unexpected then, and should be looked into?


Actually, I don't see the reason for the different behaviour for nvptx.
I just tested that for nvptx the correct device number is returned by
omp_get_device_num on the device - also if we have more than one device.



Still, should 'static' be removed here, too?


I wouldn't suggest unless it is really necessary? I mean, there we don't
have any issue. Although I aggree with Andrew that we could combine both
icv-device.c files into one common file.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[PATCH] libgomp, OpenMP: Fix issue for omp_get_device_num on gfx targets.

2022-01-12 Thread Marcel Vollweiler

Hi,

Currently omp_get_device_num does not work on gcn targets with more than
one offload device. The reason is that GOMP_DEVICE_NUM_VAR is static in
icv-device.c and thus "__gomp_device_num" is not visible in the offload
image.

This patch removes "static" such that "__gomp_device_num" is now part of
the offload image and can now be found in GOMP_OFFLOAD_load_image in the
plugin.

This is not an issue for nvptx. There, "__gomp_device_num" is in the
offload image even with "static".

The patch was tested on x86_64-linux with amdgcn offloading with no
regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp, OpenMP: Fix issue for omp_get_device_num on gcn targets.

Currently omp_get_device_num does not work on gcn targets with more than one
offload device. The reason is that GOMP_DEVICE_NUM_VAR is static in
icv-device.c and thus "__gomp_device_num" is not visible in the offload image.

This patch removes "static" such that "__gomp_device_num" is now part of the
offload image and can now be found in GOMP_OFFLOAD_load_image in the plugin.

This is not an issue for nvptx. There, "__gomp_device_num" is in the offload
image even with "static".

libgomp/ChangeLog:

* config/gcn/icv-device.c: Make GOMP_DEVICE_NUM_VAR public (remove
"static") to make the device num available in the offload image.

diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c
index fcfa0f3..f70b7e6 100644
--- a/libgomp/config/gcn/icv-device.c
+++ b/libgomp/config/gcn/icv-device.c
@@ -60,7 +60,7 @@ omp_is_initial_device (void)
 
 /* This is set to the device number of current GPU during device 
initialization,
when the offload image containing this libgomp portion is loaded.  */
-static volatile int GOMP_DEVICE_NUM_VAR;
+volatile int GOMP_DEVICE_NUM_VAR;
 
 int
 omp_get_device_num (void)


[PING] [PATCH] C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to 'target' construct

2022-01-10 Thread Marcel Vollweiler

Hi,

I'd like to ping the patch for the OpenMP 'has_device_addr' clause on
the target construct:

https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585361.html


Thanks
Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to 'target' construct

2021-11-24 Thread Marcel Vollweiler

Hi Jakub,

this is again a new version of the 'has_device_addr' patch. It includes
further minor changes in the C/C++ part and in addition the Fortran
implementation.

Tested on x86_64-linux with nvptx offloading with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to 'target' construct.

This patch adds the 'has_device_addr' clause to the OpenMP 'target' construct
which was introduced in OpenMP 5.1 (OpenMP API 5.1 specification pp. 197ff):

has_device_addr(list)

"The has_device_addr clause indicates that its list items already have device
addresses and therefore they may be directly accessed from a target device.
If the device address of a list item is not for the device on which the target
region executes, accessing the list item inside the region results in
unspecified behavior. The list items may include array sections." (p. 200)

"A list item may not be specified in both an is_device_ptr clause and a
has_device_addr clause on the directive." (p. 202)

"A list item that appears in an is_device_ptr or a has_device_addr clause must
not be specified in any data-sharing attribute clause on the same target
construct." (p. 203)

gcc/c-family/ChangeLog:

* c-omp.c (c_omp_split_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR case.
* c-pragma.h (enum pragma_kind): Add 5.1 in comment.
(enum pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR.

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_name): Parse 'has_device_addr' clause.
(c_parser_omp_variable_list): Handle array sections.
(c_parser_omp_clause_has_device_addr): Added.
(c_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR case.
(c_parser_omp_target_exit_data): Add HAS_DEVICE_ADDR to OMP_CLAUSE_MASK.
* c-typeck.c (handle_omp_array_sections): Handle clause restrictions.
(c_finish_omp_clauses): Handle array sections.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_name): Parse 'has_device_addr' clause.
(cp_parser_omp_var_list_no_open): Handle array sections.
(cp_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR case.
(cp_parser_omp_target_update): Add HAS_DEVICE_ADDR to OMP_CLAUSE_MASK.
* pt.c (tsubst_omp_clauses): Add cases for OMP_CLAUSE_HAS_DEVICE_ADDR.
* semantics.c (handle_omp_array_sections): Handle clause restrictions.
(finish_omp_clauses): Handle array sections.

gcc/fortran/ChangeLog:

* dump-parse-tree.c (show_omp_clauses): Added OMP_LIST_HAS_DEVICE_ADDR
case.
* gfortran.h: Added OMP_LIST_HAS_DEVICE_ADDR.
* openmp.c (enum omp_mask1): Added OMP_CLAUSE_HAS_DEVICE_ADDR.
(gfc_match_omp_clauses): Parse HAS_DEVICE_ADDR clause.
(resolve_omp_clauses): Same.
* trans-openmp.c (gfc_trans_omp_variable_list): Added 
OMP_LIST_HAS_DEVICE_ADDR case.
(gfc_trans_omp_clauses): Firstprivatize of array descriptors.

gcc/ChangeLog:

* gimplify.c (gimplify_scan_omp_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR
cases
and handle array sections.
(gimplify_adjust_omp_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR case.
* omp-low.c (scan_sharing_clauses): Handle OMP_CLAUSE_HAS_DEVICE_ADDR.
(lower_omp_target): Same.
* tree-core.h (enum omp_clause_code): Same.
* tree-nested.c (convert_nonlocal_omp_clauses): Same.
(convert_local_omp_clauses): Same.
* tree-pretty-print.c (dump_omp_clause): Same.
* tree.c: Same.

libgomp/ChangeLog:

* libgomp.texi: Updated entry for HAS_DEVICE_ADDR.
* target.c (copy_firstprivate_data): Copy only if host address is not
NULL.
* testsuite/libgomp.c++/target-has-device-addr-2.C: New test.
* testsuite/libgomp.c++/target-has-device-addr-4.C: New test.
* testsuite/libgomp.c-c++-common/target-has-device-addr-1.c: New test.
* testsuite/libgomp.c/target-has-device-addr-3.c: New test.
* testsuite/libgomp.fortran/target-has-device-addr-1.f90: New test.
* testsuite/libgomp.fortran/target-has-device-addr-2.f90: New test.
* testsuite/libgomp.fortran/target-has-device-addr-3.f90: New test.
* testsuite/libgomp.fortran/target-has-device-addr-4.f90: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/clauses-1.c: Added has_device_addr to test cases.
* g++.dg/gomp/attrs-1.C: Added has_device_addr to test cases.
* g++.dg/gomp/attrs-2.C: Added has_device_addr to test cases.
* c-c++-common/gomp/target-has-device-addr-1.c: New test.
* c-c++-common/gomp/target-has-device-addr-2.c: New test.
* 

Re: [PATCH] C, C++, OpenMP: Add 'has_device_addr' clause to 'target' construct

2021-11-15 Thread Marcel Vollweiler

Hi Jakub,

Am 20.10.2021 um 14:38 schrieb Jakub Jelinek:

On Mon, Oct 18, 2021 at 06:17:20PM +0200, Marcel Vollweiler wrote:

@@ -14255,6 +14257,16 @@ c_parser_omp_clause_use_device_addr (c_parser *parser, 
tree list)
list);
  }

+/* OpenMP 5.1:
+   has_device_addr ( variable-list ) */
+
+static tree
+c_parser_omp_clause_has_device_addr (c_parser *parser, tree list)
+{
+  return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_HAS_DEVICE_ADDR,
+   list);
+}
+
  /* OpenMP 4.5:
 is_device_ptr ( variable-list ) */

@@ -16945,6 +16957,10 @@ c_parser_omp_all_clauses (c_parser *parser, 
omp_clause_mask mask,
   clauses = c_parser_omp_clause_use_device_addr (parser, clauses);
   c_name = "use_device_addr";
   break;
+case PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR:
+  clauses = c_parser_omp_clause_has_device_addr (parser, clauses);
+  c_name = "has_device_addr";
+  break;
 case PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR:
   clauses = c_parser_omp_clause_is_device_ptr (parser, clauses);
   c_name = "is_device_ptr";
@@ -20926,7 +20942,8 @@ c_parser_omp_target_exit_data (location_t loc, c_parser 
*parser,
 | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
 | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP)   \
 | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
-| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
+| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)\
+| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR))

  static bool
  c_parser_omp_target (c_parser *parser, enum pragma_context context, bool 
*if_p)


OpenMP 5.1 in [200:6-9] says:
The has_device_addr clause indicates ... The list items may include array 
sections.

This means in addition to the c-parser.c and parser.c changes you've done,
at least c_parser_omp_variable_list needs to change to include
OMP_CLAUSE_HAS_DEVICE_ADDR among
 case OMP_CLAUSE_AFFINITY:
 case OMP_CLAUSE_DEPEND:
 case OMP_CLAUSE_REDUCTION:
 case OMP_CLAUSE_IN_REDUCTION:
 case OMP_CLAUSE_TASK_REDUCTION:
clauses (similarly for C++) and then {,c_}finish_omp_clauses needs to handle
it similarly to other clauses that can have array sections.
As it is a data sharing clause, I think the closest model (e.g. for
handle_omp_array_sections* purposes) is OMP_CLAUSE_*REDUCTION.
Then even the case when OMP_CLAUSE_DECL of the clause needs handling
similarly to other clauses that accept array sections.



The handling for array sections is added now. The basic idea of the
implementation is that it seems to be sufficient to consider the base
variable. I'm not completely sure but I think access to memory which is
not specified in has_device_addr cannot be prevented at all and my
reading of the OpenMP 5.1 specification is that the behavour is
undefined for access to memory that is not specified in has_device_addr.
Thus, limitation of an array to some section does not prevent for using
parts of the array outside the specified array section.

Moreover, cases like

  #pragma omp target data map(x[2:3]) use_device_addr(x)
#pragma omp target has_device_addr(x[2:3])

or

  #pragma omp target data map(x[2:3]) use_device_addr(x[2:3])
#pragma omp target has_device_addr(x[2:3])

do not work yet, since the use_device_addr clause does currently not
support array sections.


diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 0aac978..d677592 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -14054,7 +14054,7 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
  {
bitmap_head generic_head, firstprivate_head, lastprivate_head;
bitmap_head aligned_head, map_head, map_field_head, map_firstprivate_head;
-  bitmap_head oacc_reduction_head;
+  bitmap_head oacc_reduction_head, has_device_addr_head, is_device_ptr_head;


I'd prefer not to add new bitmaps unless necessary, can't the clause use the
same bitmap together with is_device_ptr clause?  One can't specify something
both as is_device_ptr and has_device_addr at the same time...



Both bitmaps are now combined to one. I previously seperated the bitmaps
in order to have a clearer naming. Now I called it 'is_on_device' to be
more general than with is_device_ptr or has_device_addr. However, other
suggestions are welcome :)


--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -36145,7 +36145,9 @@ cp_parser_omp_clause_name (cp_parser *parser)
 result = PRAGMA_OMP_CLAUSE_GRAINSIZE;
   break;
 case 'h':
-  if (!strcmp ("hint", p))
+  if (!strcmp ("has_device_addr", p))
+result = PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR;
+  else if (!strcmp ("hint", p))
 result = PRAGMA_OMP_CLAUSE_HINT;
   else if (!strcmp ("host", p))
 result = PRAGMA_OACC

[PATCH] C, C++, OpenMP: Add 'has_device_addr' clause to 'target' construct

2021-10-18 Thread Marcel Vollweiler

Hi,

This patch adds the 'has_device_addr' clause to the OpenMP 'target'
construct which was introduced in OpenMP 5.1:

"The has_device_addr clause was added to the target construct to allow
access to variables or array sections that already have a device
address" (OpenMP 5.1 Specification, p. 669)

"The has_device_addr clause indicates that its list items already have
device addresses and therefore they may be directly accessed from a
target device. If the device address of a list item is not for the
device on which the target region executes, accessing the list item
inside the region results in unspecified behavior. The list items may
include array sections." (OpenMP 5.1 Specification, p. 200)

There are some restrictions for 'has_device_addr' (p. 202f):

1. "A list item may not be specified in both an is_device_ptr clause and
a has_device_addr clause on the directive."

2. "A list item that appears in an is_device_ptr or a has_device_addr
clause must not be specified in any data-sharing attribute clause on the
same target construct."

3. "A list item that appears in a has_device_addr clause must have a
valid device address for the device data environment."

4. As discussed on the omp-lang mailing list
(https://mailman.openmp.org/mailman/private/omp-lang/2021/017982.html),
has_device_addr is a data-sharing attribute clause (that is not yet
stated explicitly but will be corrected in OpenMP 5.2) and should not be
used together with the map clause on the same construct.

Similar restrictions hold also for the 'is_device_ptr' clause, so I
updated the code and added tests for that clause, too.

I tested the patch without regressions on powerpc64le-linux-gnu with
nvptx offloading and x86_64-linux-gnu with amdgcn offloading.

This patch only considers C/C++. The changes for Fortran will be
submitted separately later.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
C, C++, OpenMP: Add 'has_device_addr' clause to 'target' construct.

This patch adds the 'has_device_addr' clause to the OpenMP 'target' construct
which was introduced in OpenMP 5.1.

gcc/c-family/ChangeLog:

* c-omp.c (c_omp_split_clauses): Add OMP_CLAUSE_HAS_DEVICE_ADDR case.
* c-pragma.h (enum pragma_kind): Add 5.1 in comment.
(enum pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR.

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_name): Parse 'has_device_addr' clause.
(c_parser_omp_clause_has_device_addr): Added.
(c_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR case.
(c_parser_omp_target_exit_data): Add HAS_DEVICE_ADDR to OMP_CLAUSE_MASK.
* c-typeck.c (c_finish_omp_clauses): Add check that has_device_addr and 
is_device_ptr do not appear together with map.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_name): Parse 'has_device_addr' clause.
(cp_parser_omp_all_clauses): Add PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR case.
(cp_parser_omp_target_update): Add HAS_DEVICE_ADDR to OMP_CLAUSE_MASK.
* pt.c (tsubst_omp_clauses): Add cases for OMP_CLAUSE_HAS_DEVICE_ADDR.
* semantics.c (finish_omp_clauses): Add check that has_device_addr and
is_device_ptr do not appear together with map.

gcc/ChangeLog:

* gimplify.c (gimplify_scan_omp_clauses): Add 
OMP_CLAUSE_HAS_DEVICE_ADDR case.
(gimplify_adjust_omp_clauses): Likewise.
* omp-low.c (scan_sharing_clauses): Add lowering for has_device_addr 
clause.
(lower_omp_target): Likewise.
* tree-core.h (enum omp_clause_code): Update enum.
* tree-nested.c (convert_nonlocal_omp_clauses): Add has_device_addr 
support.
(convert_local_omp_clauses): Likewise.
* tree-pretty-print.c (dump_omp_clause): Likewise.
* tree.c: Update omp_clause_num_ops array.

libgomp/ChangeLog:

* libgomp.texi: Updated entry for 'has-device-addr'.
* testsuite/libgomp.c++/target-has-device-addr-2.C: New test.
* testsuite/libgomp.c-c++-common/target-has-device-addr-1.c: New test.
* testsuite/libgomp.c-c++-common/target-has-device-addr-3.c: New test.
* testsuite/libgomp.c/target-has-device-addr-4.c: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/clauses-1.c: Add has_device_addr to test cases.
* g++.dg/gomp/attrs-1.C: Likewise.
* g++.dg/gomp/attrs-2.C: Likewise.
* c-c++-common/gomp/target-has-device-addr-1.c: New test.
* c-c++-common/gomp/target-is-device-ptr.c: New test.

diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index b9024cb..eb4950c 100644
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -1837,6 +1837,7 @@ c_omp_split_clauses (location_t loc, enum tree_code code,
case OMP_CLAUSE_DEVICE:

Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

2021-10-11 Thread Marcel Vollweiler

Hi Jakub,

Am 11.10.2021 um 11:49 schrieb Jakub Jelinek:

On Mon, Oct 11, 2021 at 11:40:54AM +0200, Marcel Vollweiler wrote:

libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available (one test
for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
deprecated in C/C++ and Fortran for OpenMP 5.1.

libgomp/ChangeLog:

 * allocator.c (omp_init_allocator): Replace omp_atv_sequential with
 omp_atv_serialized.
 * omp.h.in: Add deprecated flag for omp_atv_sequential.
 * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
 * testsuite/libgomp.c-c++-common/alloc-10.c: New test.
 * testsuite/libgomp.fortran/alloc-12.f90: New test.


LGTM, except one nit.


--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
+}
\ No newline at end of file


Please make sure the file ends with a newline before committing.


Changed :)



  Jakub



Thanks,
Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available (one test
for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
deprecated in C/C++ and Fortran for OpenMP 5.1.

libgomp/ChangeLog:

* allocator.c (omp_init_allocator): Replace omp_atv_sequential with
omp_atv_serialized.
* omp.h.in: Add deprecated flag for omp_atv_sequential.
* omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
* testsuite/libgomp.c-c++-common/alloc-10.c: New test.
* testsuite/libgomp.fortran/alloc-12.f90: New test.

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index dce600f..deebb6a 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -82,7 +82,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int 
ntraits,
break;
  case omp_atv_contended:
  case omp_atv_uncontended:
- case omp_atv_sequential:
+ case omp_atv_serialized:
  case omp_atv_private:
data.sync_hint = traits[i].value;
break;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index d75ee13..e57e192 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -157,7 +157,7 @@ typedef enum omp_alloctrait_value_t
   omp_atv_contended = 3,
   omp_atv_uncontended = 4,
   omp_atv_serialized = 5,
-  omp_atv_sequential = omp_atv_serialized,
+  omp_atv_sequential __GOMP_DEPRECATED_5_1 = omp_atv_serialized,
   omp_atv_private = 6,
   omp_atv_all = 7,
   omp_atv_thread = 8,
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 1063eee..57766b5 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -810,7 +810,7 @@
 #endif
 
 #if _OPENMP >= 202011
-!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master
+!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master, omp_atv_sequential
 #endif
 
   end module omp_lib
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c 
b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
new file mode 100644
index 000..01ae150d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
@@ -0,0 +1,25 @@
+#include 
+#include 
+#include 
+
+const omp_alloctrait_t traits[]
+= { { omp_atk_alignment, 64 },
+{ omp_atk_sync_hint, omp_atv_serialized },
+{ omp_atk_fallback, omp_atv_null_fb } };
+
+int
+main ()
+{
+  omp_allocator_handle_t a;
+  int *volatile p;
+  a = omp_init_allocator (omp_default_mem_space, 3, traits);
+  if (a == omp_null_allocator)
+abort ();
+  p = (int *) omp_alloc (3072, a);
+  if uintptr_t) p) % 64) != 0)
+abort ();
+  p[0] = 1;
+  p[3071 / sizeof (int)] = 2;
+  omp_free (p, a);
+  omp_destroy_allocator (a);
+}
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-12.f90 
b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
new file mode 100644
index 000..3d10959
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
@@ -0,0 +1,28 @@
+! { dg-additional-options "-Wall -Wextra" }
+program main
+  use omp_lib
+  use ISO_C_Binding
+  implicit none (external, type)
+  type(c_ptr) :: p
+  integer, pointer, contiguous :: ip(:)
+  type (omp_alloctrait) :: traits(3)
+  integer (omp_allocator_handle_kind) :: a
+  integer (c_ptrdiff_t) :: iptr
+
+  traits = [omp_alloctrait (omp_

[Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

2021-10-11 Thread Marcel Vollweiler

Hi,

The variable omp_atv_sequential was replaced by omp_atv_serialized in
OpenMP 5.1. This was already implemented by Jakub (C/C++, commit
ea82325afec) and Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available
(one test for C/C++ and one for Fortran). Besides that
omp_atv_sequential is marked as deprecated in C/C++ and Fortran for
OpenMP 5.1.

The patch was tested on x86_64-linux and powerpc64le-linux with nvptx
offloading and on x86_64-linux with amdgcn offloading with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available (one test
for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
deprecated in C/C++ and Fortran for OpenMP 5.1.

libgomp/ChangeLog:

* allocator.c (omp_init_allocator): Replace omp_atv_sequential with
omp_atv_serialized.
* omp.h.in: Add deprecated flag for omp_atv_sequential.
* omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
* testsuite/libgomp.c-c++-common/alloc-10.c: New test.
* testsuite/libgomp.fortran/alloc-12.f90: New test.

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index dce600f..deebb6a 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -82,7 +82,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int 
ntraits,
break;
  case omp_atv_contended:
  case omp_atv_uncontended:
- case omp_atv_sequential:
+ case omp_atv_serialized:
  case omp_atv_private:
data.sync_hint = traits[i].value;
break;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index d75ee13..e57e192 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -157,7 +157,7 @@ typedef enum omp_alloctrait_value_t
   omp_atv_contended = 3,
   omp_atv_uncontended = 4,
   omp_atv_serialized = 5,
-  omp_atv_sequential = omp_atv_serialized,
+  omp_atv_sequential __GOMP_DEPRECATED_5_1 = omp_atv_serialized,
   omp_atv_private = 6,
   omp_atv_all = 7,
   omp_atv_thread = 8,
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 1063eee..57766b5 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -810,7 +810,7 @@
 #endif
 
 #if _OPENMP >= 202011
-!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master
+!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master, omp_atv_sequential
 #endif
 
   end module omp_lib
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c 
b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
new file mode 100644
index 000..742c64a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
@@ -0,0 +1,25 @@
+#include 
+#include 
+#include 
+
+const omp_alloctrait_t traits[]
+= { { omp_atk_alignment, 64 },
+{ omp_atk_sync_hint, omp_atv_serialized },
+{ omp_atk_fallback, omp_atv_null_fb } };
+
+int
+main ()
+{
+  omp_allocator_handle_t a;
+  int *volatile p;
+  a = omp_init_allocator (omp_default_mem_space, 3, traits);
+  if (a == omp_null_allocator)
+abort ();
+  p = (int *) omp_alloc (3072, a);
+  if uintptr_t) p) % 64) != 0)
+abort ();
+  p[0] = 1;
+  p[3071 / sizeof (int)] = 2;
+  omp_free (p, a);
+  omp_destroy_allocator (a);
+}
\ No newline at end of file
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-12.f90 
b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
new file mode 100644
index 000..3d10959
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
@@ -0,0 +1,28 @@
+! { dg-additional-options "-Wall -Wextra" }
+program main
+  use omp_lib
+  use ISO_C_Binding
+  implicit none (external, type)
+  type(c_ptr) :: p
+  integer, pointer, contiguous :: ip(:)
+  type (omp_alloctrait) :: traits(3)
+  integer (omp_allocator_handle_kind) :: a
+  integer (c_ptrdiff_t) :: iptr
+
+  traits = [omp_alloctrait (omp_atk_alignment, 64), &
+omp_alloctrait (omp_atk_fallback, omp_atv_null_fb), &
+omp_alloctrait (omp_atk_sync_hint, omp_atv_serialized)]
+  a = omp_init_allocator (omp_default_mem_space, 3, traits)
+  if (a == omp_null_allocator) stop 1
+
+  p = omp_alloc (3 * c_sizeof (0), a)
+  if (.not. c_associated (p)) stop 2
+  call c_f_pointer (p, ip, [3])
+  if (mod (TRANSFER (p, iptr), 64) /= 0) &
+stop 3
+  ip(1) = 1
+  ip(2) = 2
+  ip(3) = 3
+  call omp_free (p, a)
+  call omp_destroy_allocator (a)
+end program main


[Patch] C, C++, Fortran, OpenMP: Add support for 'flush seq_cst' construct

2021-09-06 Thread Marcel Vollweiler

Hi,

this patch adds support for the 'seq_cst' memory order clause on the
'flush' directive which was introduced in OpenMP 5.1 (p.275ff of the
OpenMP 5.1 Specification):

"If neither memory-order-clause nor a list appears on the flush
construct then the behavior is as if memory-order-clause is seq_cst.

A flush construct with the seq_cst clause, executed on a given thread,
operates as if all data storage blocks that are accessible to the thread
are flushed by a strong flush operation.

...

An implementation may implement a flush construct with a list by
ignoring the list and treating it the same as a flush construct with the
seq_cst clause."

I am not completely sure about the correct memory model specification:
"MEMMODEL_SYNC_SEQ_CST" vs. "MEMMODEL_SEQ_CST".
As "MEMMODEL_SYNC_SEQ_CST" is already used for flush without a clause
(that should behave in the same way than using seq_cst), see
expand_builtin_sync_synchronize in gcc/builtins.c, and regarding the
discussion in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65697 I found
it appropriate to use "MEMMODEL_SYNC_SEQ_CST" in order to guarantee a
strong flush.

I tested on x86_64-linux with nvptx offloading with no regressions.

Marcel



-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
C, C++, Fortran, OpenMP: Add support for 'flush seq_cst' construct.

This patch adds support for the 'seq_cst' memory order clause on the 'flush'
directive which was introduced in OpenMP 5.1.

gcc/c-family/ChangeLog:

* c-omp.c (c_finish_omp_flush): Handle MEMMODEL_SEQ_CST.

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_flush): Parse 'seq_cst' clause on 'flush' 
directive.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_flush): Parse 'seq_cst' clause on 'flush'
directive.
* semantics.c (finish_omp_flush): Handle MEMMODEL_SEQ_CST.

gcc/fortran/ChangeLog:

* openmp.c (gfc_match_omp_flush): Parse 'seq_cst' clause on 'flush'
directive.
* trans-openmp.c (gfc_trans_omp_flush): Handle OMP_MEMORDER_SEQ_CST.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/flush-1.c: Add test case for 'seq_cst'.
* c-c++-common/gomp/flush-2.c: Add test case for 'seq_cst'.
* g++.dg/gomp/attrs-1.C:  Adapt test to handle all flush clauses.
* gfortran.dg/gomp/flush-1.f90:  Add test case for 'seq_cst'.
* gfortran.dg/gomp/flush-2.f90:  Add test case for 'seq_cst'.

diff --git a/gcc/c-family/c-omp.c b/gcc/c-family/c-omp.c
index 18de7e4..4b95fc1 100644
--- a/gcc/c-family/c-omp.c
+++ b/gcc/c-family/c-omp.c
@@ -606,7 +606,7 @@ c_finish_omp_flush (location_t loc, int mo)
 {
   tree x;
 
-  if (mo == MEMMODEL_LAST)
+  if (mo == MEMMODEL_LAST || mo == MEMMODEL_SEQ_CST)
 {
   x = builtin_decl_explicit (BUILT_IN_SYNC_SYNCHRONIZE);
   x = build_call_expr_loc (loc, x, 0);
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 3b1d10f..4d074ec 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -18339,7 +18339,9 @@ c_parser_omp_flush (c_parser *parser)
   const char *p
= IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
 
-  if (!strcmp (p, "acq_rel"))
+  if (!strcmp (p, "seq_cst"))
+   mo = MEMMODEL_SEQ_CST;
+  else if (!strcmp (p, "acq_rel"))
mo = MEMMODEL_ACQ_REL;
   else if (!strcmp (p, "release"))
mo = MEMMODEL_RELEASE;
@@ -18347,7 +18349,8 @@ c_parser_omp_flush (c_parser *parser)
mo = MEMMODEL_ACQUIRE;
   else
error_at (c_parser_peek_token (parser)->location,
- "expected %, % or %");
+ "expected %, %, % or "
+ "%");
   c_parser_consume_token (parser);
 }
   if (c_parser_next_token_is (parser, CPP_OPEN_PAREN))
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index ea71f9c..f9c2c8a 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -40742,7 +40742,9 @@ cp_parser_omp_flush (cp_parser *parser, cp_token 
*pragma_tok)
 {
   tree id = cp_lexer_peek_token (parser->lexer)->u.value;
   const char *p = IDENTIFIER_POINTER (id);
-  if (!strcmp (p, "acq_rel"))
+  if (!strcmp (p, "seq_cst"))
+   mo = MEMMODEL_SEQ_CST;
+  else if (!strcmp (p, "acq_rel"))
mo = MEMMODEL_ACQ_REL;
   else if (!strcmp (p, "release"))
mo = MEMMODEL_RELEASE;
@@ -40750,7 +40752,8 @@ cp_parser_omp_flush (cp_parser *parser, cp_token 
*pragma_tok)
mo = MEMMODEL_ACQUIRE;
   else
error_at (cp_lexer_peek_token (parser->lexer)->location,
- "expected %, % or %");
+ "expected %, %, % or "
+ "%");
   cp_lexer_consume_token (parser->lexer);
 }
   if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 

Re: [Patch v2] C, C++, Fortran, OpenMP: Add support for device-modifiers for 'omp target device'

2021-09-02 Thread Marcel Vollweiler



Am 01.09.2021 um 11:02 schrieb Jakub Jelinek:

On Wed, Sep 01, 2021 at 09:06:31AM +0200, Christophe Lyon wrote:

   * gfortran.dg/gomp/target-device-ancestor-4.f90: New test.




The last new test fails on aarch64:
  /gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-4.f90:7:15: Error:
Sorry, 'reverse_offload' clause at (1) on REQUIRES directive is not yet
supported
compiler exited with status 1
PASS: gfortran.dg/gomp/target-device-ancestor-4.f90   -O   (test for
errors, line 7)
XFAIL: gfortran.dg/gomp/target-device-ancestor-4.f90   -O  sorry,
unimplemented: 'ancestor' not yet supported (test for warnings, line 9)
PASS: gfortran.dg/gomp/target-device-ancestor-4.f90   -O  (test for excess
errors)
gfortran.dg/gomp/target-device-ancestor-4.f90   -O  : dump file does not
exist
UNRESOLVED: gfortran.dg/gomp/target-device-ancestor-4.f90   -O
scan-tree-dump original "pragma omp target [^\n\r)]*device\\(ancestor:1\\)"


It is UNRESOLVED everywhere.  Unlike the C/C++ FEs that emit the original
dump even if there are errors/sorry during parsing, the Fortran FE doesn't
do that.
So I think either the dg-final should be xfailed or removed for now.


To xfail dg-final does not seem to work with a missing dump (it results
in UNRESOLVED as before). Instead I commented out dg-final with "TODO"
similar to other tests and hope that this is ok?



  Jakub



Marcel

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/target-device-ancestor-4.f90: Comment out dg-final 
to avoid
 UNRESOLVED.

diff --git a/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-4.f90 
b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-4.f90
index 540b3d0..63872fa 100644
--- a/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-4.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-4.f90
@@ -11,4 +11,4 @@
 
 end
 
-! { dg-final { scan-tree-dump "pragma omp target 
\[^\n\r)]*device\\(ancestor:1\\)" "original" } }
+! TODO: dg-final { scan-tree-dump-times "pragma omp target 
\[^\n\r)]*device\\(ancestor:1\\)" 1 "original" } }


Re: [Patch v2] C, C++, Fortran, OpenMP: Add support for device-modifiers for 'omp target device'

2021-08-25 Thread Marcel Vollweiler

Hi Jakub,

I applied all your suggested changes and checked for no test regressions
on x86_64-linux with nvptx offloading. The revised patch is attached.

Do you think that it's ok to commit the code?

Thanks,

Marcel

Am 23.08.2021 um 19:47 schrieb Jakub Jelinek:

On Fri, Aug 20, 2021 at 09:18:32PM +0200, Marcel Vollweiler wrote:


--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -15864,37 +15864,81 @@ c_parser_omp_clause_map (c_parser *parser, tree list)
  }

  /* OpenMP 4.0:
-   device ( expression ) */
+>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
   device ( expression )


Please remove all the >>>>>s.

+
+   OpenMP 5.0:
+   device ( [device-modifier :] integer-expression )
+
+   device-modifier:
+ ancestor | device_num */




+  /* A requires directive with the reverse_offload clause must be
+  specified.  */
+  if ((omp_requires_mask & OMP_REQUIRES_REVERSE_OFFLOAD) == 0)
+{
+  c_parser_error (parser, "a % directive with the "
+  "% clause must be "
+  "specified");


[BI think this diagnostics is confusing, it tells the user that it has to
do something but doesn't tell why.  It is also not a parser error.
So I think it should be instead
error_at (tok->location, "% device modifier not "
 "preceded by % directive "
 "with % clause");


+  parens.skip_until_found_close (parser);
+  return list;
+}
+  ancestor = true;
+}



+  if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
+{
+  c_parser_error (parser, "expected integer expression");
+  return list;
  }

+  check_no_duplicate_clause (list, OMP_CLAUSE_DEVICE, "device");
+
+  c = build_omp_clause (clause_loc, OMP_CLAUSE_DEVICE);
+
+  OMP_CLAUSE_DEVICE_ID (c) = t;
+  OMP_CLAUSE_CHAIN (c) = list;
+  OMP_CLAUSE_DEVICE_ANCESTOR (c) = ancestor;
+
+  list = c;
return list;
  }

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 5349ef1..b4d8d81 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -15139,6 +15139,22 @@ c_finish_omp_clauses (tree clauses, enum 
c_omp_region_type ort)
 case OMP_CLAUSE_COLLAPSE:
 case OMP_CLAUSE_FINAL:
 case OMP_CLAUSE_DEVICE:
+  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE
+  && OMP_CLAUSE_DEVICE_ANCESTOR (c))
+{
+  t = OMP_CLAUSE_DEVICE_ID (c);
+  if (TREE_CODE (t) == INTEGER_CST
+  && wi::to_widest (t) != 1)
+{
+  error_at (OMP_CLAUSE_LOCATION (c),
+"the % clause expression must evaluate to "
+"%<1%>");
+  remove = true;
+  break;
+}
+}
+  /* FALLTHRU */


For the C FE, I'd suggest to move this to the c_parser_omp_clause_device
routine like other similar checking is done there too.  And you can use
if (TREE_CODE (t) == INTEGER_CST && !integer_onep (t))

+  error_at (tok->location, "a % directive with the "



+   "% clause must be "
+   "specified");


See above.


@@ -38562,6 +38601,7 @@ cp_parser_omp_clause_device (cp_parser *parser, tree 
list,
c = build_omp_clause (location, OMP_CLAUSE_DEVICE);
OMP_CLAUSE_DEVICE_ID (c) = t;
OMP_CLAUSE_CHAIN (c) = list;
+  OMP_CLAUSE_DEVICE_ANCESTOR (c) = ancestor;


But in C++ the INTEGER_CST checking shouldn't be done here, because
the argument could be type or value dependent.


--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -7334,6 +7334,15 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
 "% id must be integral");
   remove = true;
 }
+  else if (OMP_CLAUSE_DEVICE_ANCESTOR (c)
+   && TREE_CODE (t) == INTEGER_CST
+   && wi::to_widest (t) != 1)


!integer_onep (t)


+  if (!(gfc_current_ns->omp_requires & OMP_REQ_REVERSE_OFFLOAD))
+{
+  gfc_error ("a % directive with the "
+ "% clause must be "
+ "specified at %C");


See above.


+  else if (gfc_match ("%e )", >device) == MATCH_YES)
+{
+}
+  else


Better != MATCH_YES and drop the {} else ?


+{
+  gfc_error ("Expected integer exp

Re: [Patch v2] C, C++, Fortran, OpenMP: Add support for device-modifiers for 'omp target device'

2021-08-20 Thread Marcel Vollweiler

Hi Jakub,

this is the second version of the patch for the device-modifiers for
'omp target device'.

Am 20.07.2021 um 15:30 schrieb Jakub Jelinek:

On Wed, Jul 07, 2021 at 07:59:58PM +0200, Marcel Vollweiler wrote:

OpenMP: Add support for device-modifiers for 'omp target device'

gcc/c/ChangeLog:

 * c-parser.c (c_parser_omp_clause_device): Add support for
 device-modifiers for 'omp target device'.

gcc/cp/ChangeLog:

 * parser.c (cp_parser_omp_clause_device): Add support for
 device-modifiers for 'omp target device'.

gcc/fortran/ChangeLog:

 * openmp.c (gfc_match_omp_clauses): Add support for
 device-modifiers for 'omp target device'.

gcc/testsuite/ChangeLog:

 * c-c++-common/gomp/target-device-1.c: New test.
 * c-c++-common/gomp/target-device-2.c: New test.
 * gfortran.dg/gomp/target-device-1.f90: New test.
 * gfortran.dg/gomp/target-device-2.f90: New test.



  static tree
  c_parser_omp_clause_device (c_parser *parser, tree list)
  {
location_t clause_loc = c_parser_peek_token (parser)->location;
+  location_t expr_loc;
+  c_expr expr;
+  tree c, t;
+
matching_parens parens;
-  if (parens.require_open (parser))
+  if (!parens.require_open (parser))
+return list;
+
+  int pos = 1;
+  int pos_colon = 0;
+  while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME
+ || c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON
+ || c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COMMA)


Why CPP_COMMA?  The OpenMP 5.0/5.1/5.2 grammar only supports a single device
modifier.
So please simplify it to just an
   if (c_parser_next_token_is (parser, CPP_NAME)
   && c_parser_peek_2nd_token (parser, 2)->type == CPP_COLON)
{
and check there just for the two modifiers.
   const char *p
  = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
   if (strcmp ("ancestor", p) == 0)
 ...
   else if (strcmp ("device-num", p) == 0)
  ;
   else
 error_at (..., "expected % or %");
 }
Similarly for C++.


The parser files for C and C++ are simplyfied accordingly.



Also, even if we sorry on device(ancestor: ...), it would be nice if you
in tree.h define OMP_CLAUSE_DEVICE_ANCESTOR macro (with
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE)->base.public_flag)
definition), set it, sorry later on it (e.g. omp-expand.c) only if it
survived till then (wasn't removed because of other errors) and diagnose
the various restrictions/requirements on device(ancestor:).


I changed it as you proposed. I marked the tests for "sorry,
unimplemented: 'ancestor' not yet supported" with xfail because a
previous sorry for "requires reverse_offload" suppresses the message for
'ancestor'. "reverse_offload" is explicitly needed due to the
specificated ancestor restrictions (OpenMP specification p. 175, l. 1).


In particular:
1) that OMP_CLAUSE_DEVICE clauses with OMP_CLAUSE_DEVICE_ANCESTOR
only appear on OMP_TARGET and not on other constructs
(this can be easily tested e.g. during gimplification, when
gimplify_scan_omp_clauses sees OMP_CLAUSE_DEVICE with
OMP_CLAUSE_DEVICE_ANCESTOR and code != OMP_TARGET, diagnose)
2) that if after the usual fully folding the argument is INTEGER_CST,
it is equal to 1 (the spec says must evaluate to 1, but doesn't say
it has to be a constant, so it can evaluate to 1 at runtime but if it is
a constant other than 1, we know it will not evaluate to 1); this can be
done in *finish_omp_clauses
3) that omp_requires_mask has OMP_REQUIRES_REVERSE_OFFLOAD set; this should
be checked during the parsing
4) only the device, firstprivate, private, defaultmap, and map clauses may
appear on the construct; can be also done during gimplification, there is
at most one device clause, so walking all clauses when we see
OMP_CLAUSE_DEVICE_ANCESTOR is still linear complexity
5) no OpenMP constructs or calls to OpenMP API runtime routines are allowed 
inside
the corresponding target region (this is something that should be checked
in omp-low.c region nesting code, we already have similar restrictions
for e.g. the loop construct)
Everything should be covered by testcases.


Tests were added for all cases.



  Jakub



I tested on x86_64-linux with nvptx offloading with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
Add support for device-modifiers for 'omp target device'.

'device_num' and 'ancestor' are now parsed on target device constructs for C,
C++, and Fortran (see OpenMP specification 5.0, p. 170). When 'ancestor' is
 used, then 'sorry, not supported' is output. Moreover, the restrictions for
'ancestor

[Patch] C, C++, Fortran, OpenMP: Add support for device-modifiers for 'omp target device'

2021-07-07 Thread Marcel Vollweiler

This patch adds device-modifiers to the device clause:

   #pragma omp target device ([ device-modifier :] integer-expression)

where device-modifier is either 'ancestor' or 'device_num'.

The 'device_num' case

   #pragma omp target device (device_num : integer-expression)

is treated in the same way as

   #pragma omp target device (integer-expression)

before.

For the 'ancestor' case

   #pragma omp target device (ancestor: integer-expression)

a message 'sorry, not yet implemented' is output.


-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
OpenMP: Add support for device-modifiers for 'omp target device'

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_device): Add support for 
device-modifiers for 'omp target device'.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_device): Add support for 
device-modifiers for 'omp target device'.

gcc/fortran/ChangeLog:

* openmp.c (gfc_match_omp_clauses): Add support for 
device-modifiers for 'omp target device'.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/target-device-1.c: New test.
* c-c++-common/gomp/target-device-2.c: New test.
* gfortran.dg/gomp/target-device-1.f90: New test.
* gfortran.dg/gomp/target-device-2.f90: New test.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 9a56e0c..defc52d 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -15864,37 +15864,117 @@ c_parser_omp_clause_map (c_parser *parser, tree list)
 }
 
 /* OpenMP 4.0:
-   device ( expression ) */
+   device ( expression )
+
+   OpenMP 5.0:
+   device ( [device-modifier :] integer-expression )
+
+   device-modifier:
+ ancestor | device_num */
 
 static tree
 c_parser_omp_clause_device (c_parser *parser, tree list)
 {
   location_t clause_loc = c_parser_peek_token (parser)->location;
+  location_t expr_loc;
+  c_expr expr;
+  tree c, t;
+
   matching_parens parens;
-  if (parens.require_open (parser))
+  if (!parens.require_open (parser))
+return list;
+
+  int pos = 1;
+  int pos_colon = 0;
+  while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME
+|| c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON
+|| c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COMMA)
 {
-  location_t expr_loc = c_parser_peek_token (parser)->location;
-  c_expr expr = c_parser_expr_no_commas (parser, NULL);
-  expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
-  tree c, t = expr.value;
-  t = c_fully_fold (t, false, NULL);
+  if (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON)
+   {
+ pos_colon = pos;
+ break;
+   }
+  pos++;
+}
 
-  parens.skip_until_found_close (parser);
+  const char *err_msg;
+  if (pos_colon == 1)
+{
+  err_msg = "expected device-modifier % or %";
+  goto invalid_kind;
+}
 
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
+  if (pos_colon > 1)
+{
+  if (c_parser_peek_nth_token_raw (parser, 1)->type == CPP_NAME)
{
- c_parser_error (parser, "expected integer expression");
- return list;
+ c_token *tok = c_parser_peek_token (parser);
+ const char *p = IDENTIFIER_POINTER (tok->value);
+ if (strcmp ("ancestor", p) == 0)
+   {
+ if (pos_colon > 2)
+   {
+ err_msg = "expected only one device-modifier % or "
+   "%";
+ goto invalid_kind;
+   }
+
+ sorry_at (tok->location, "% not yet supported");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
+ return list;
+   }
+ else if (strcmp ("device_num", p) == 0)
+   {
+ if (pos_colon > 2)
+   {
+ err_msg = "expected only one device-modifier % or "
+   "%";
+ goto invalid_kind;
+   }
+ c_parser_consume_token (parser);
+ c_parser_peek_token (parser);
+ c_parser_consume_token (parser);
+   }
+ else
+   {
+ err_msg = "expected device-modifier % or "
+   "%";
+ goto invalid_kind;
+   }
+   }
+  else
+   {
+ err_msg = "expected device-modifier % or %";
+ goto invalid_kind;
}
+}
 
-  check_no_duplicate_clause (list, OMP_CLAUSE_DEVICE, "device");
+  expr_loc = c_parser_peek_token (parser)->location;
+  expr = c_parser_expr_no_commas (parser, NULL);
+  expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
+  c, t = expr.value;
+  t = c_fully_fold (t, false, NULL);
 
-  c = build_omp_clause (clause_loc, OMP_CLAUSE_DEVICE);
-  OMP_CLAUSE_DEVICE_ID (c) = t;
-  OMP_CLAUSE_CHAIN (c) = list;
-  list = 

Re: [PATCH] gcc/configure.ac: fix register issue for global_load assembler functions

2021-06-17 Thread Marcel Vollweiler

Am 16.06.2021 um 19:19 schrieb Joseph Myers:

On Wed, 16 Jun 2021, Julian Brown wrote:


+if test x$gcc_cv_as_gcn_global_load_fixed = xyes; then
+  AC_DEFINE(HAVE_GCN_ASM_GLOBAL_LOAD_FIXED, 1, [Define if your
assembler has fixed global_load functions.])
+else
+  AC_DEFINE(HAVE_GCN_ASM_GLOBAL_LOAD_FIXED, 0, [Define if your
assembler has fixed global_load functions.])
+fi
+AC_MSG_RESULT($gcc_cv_as_gcn_global_load_fixed)
+;;
+esac


I think the more-common idiom seems to be just having a single
AC_DEFINE if the feature is present -- like (as a random example)
HAVE_AS_IX86_REP_LOCK_PREFIX, which omits the "define ... 0" case you
have here. (You'd use "#ifdef ..." instead of "#if ... == 1" to check
the feature then, of course).


Actually I think what's preferable is the approach used with e.g.
GATHER_STATISTICS - define to 0 or 1 using a single AC_DEFINE_UNQUOTED
call (via a shell variable that's set to 0 or 1 as appropriate), then test
in "if" conditions, not #if, as far as possible, so that both alternatives
in the conditional code always get syntax-checked when compiling GCC (for
this target).



Thank you for your proposals. I adapted configure.ac and gcn.c
accordingly (similar to the GATHER_STATISTICS example).

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
gcc/configure.ac: Adapt configuration according to assembler fix of global_load 
functions.

gcc/ChangeLog:

* config.in: Regenerate.
* config/gcn/gcn.c (print_operand_address): Fix for global_load 
assembler
functions.
* configure: Regenerate.
* configure.ac: Fix for global_load assembler functions. 

diff --git a/gcc/config.in b/gcc/config.in
index e54f59c..18e6271 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -1431,6 +1431,12 @@
 #endif
 
 
+/* Define if your assembler has fixed global_load functions. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_GCN_ASM_GLOBAL_LOAD_FIXED
+#endif
+
+
 /* Define to 1 if you have the `getchar_unlocked' function. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_GETCHAR_UNLOCKED
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 283a91f..54a1c0b 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -5481,13 +5481,22 @@ print_operand_address (FILE *file, rtx mem)
  if (vgpr_offset == NULL_RTX)
/* In this case, the vector offset is zero, so we use the first
   lane of v1, which is initialized to zero.  */
-   fprintf (file, "v[1:2]");
+   {
+ if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
+   fprintf (file, "v1");
+ else
+   fprintf (file, "v[1:2]");
+   }
  else if (REG_P (vgpr_offset)
   && VGPR_REGNO_P (REGNO (vgpr_offset)))
{
- fprintf (file, "v[%d:%d]",
-  REGNO (vgpr_offset) - FIRST_VGPR_REG,
-  REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
+ if (HAVE_GCN_ASM_GLOBAL_LOAD_FIXED)
+   fprintf (file, "v%d",
+REGNO (vgpr_offset) - FIRST_VGPR_REG);
+ else
+   fprintf (file, "v[%d:%d]",
+REGNO (vgpr_offset) - FIRST_VGPR_REG,
+REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
}
  else
output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
diff --git a/gcc/configure b/gcc/configure
index 4a9e4fa..dd0194a 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28909,6 +28909,33 @@ case "$target" in
 ;;
 esac
 
+# This tests if the assembler supports two registers for global_load functions
+# (like in LLVM versions <12) or one register (like in LLVM 12).
+case "$target" in
+  amdgcn-* | gcn-*)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler fix for 
global_load functions" >&5
+$as_echo_n "checking assembler fix for global_load functions... " >&6; }
+gcc_cv_as_gcn_global_load_fixed=yes
+if test x$gcc_cv_as != x; then
+  cat > conftest.s < /dev/null 2>&1; then
+gcc_cv_as_gcn_global_load_fixed=no
+  fi
+  rm -f conftest.s conftest.o conftest
+fi
+global_load_fixed=`if test x$gcc_cv_as_gcn_global_load_fixed = xyes; then 
echo 1; else echo 0; fi`
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_GCN_ASM_GLOBAL_LOAD_FIXED $global_load_fixed
+_ACEOF
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_gcn_global_load_fixed" >&5
+$as_echo "$gcc_cv_as_gcn_global_load_fixed" >&6; }
+;;
+esac
+
 # ??? Not all targets support dwarf2 debug_line, even within a version
 # of gas.  Moreover, we need to emit a valid instruction to trigger any
 # info to the output file.  So, as supported targets are added to gas 2.11,
diff --git a/gcc/configure.ac 

Re: [PATCH] gcc/configure.ac: fix register issue for global_load assembler functions

2021-06-16 Thread Marcel Vollweiler

Changed the variable "gcc_cv_as_global_load_fixed" into
"gcc_cv_as_gcn_global_load_fixed" in order to have the "gcn" substring
also in the config.patch file.


Am 09.06.2021 um 16:47 schrieb Marcel Vollweiler:

This patch fixes an issue with global_load assembler functions leading
to a "invalid operand for instruction" error since in different LLVM
versions those functions use either one or two registers.

In this patch a compatibility check is added to the configure.ac.

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung,
Frank Thürauf


-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
gcc/configure.ac: Adapt configuration according to assembler fix of global_load 
functions.

gcc/ChangeLog:

* config.in: Regenerate.
* config/gcn/gcn.c (print_operand_address): Fix for global_load 
assembler
functions.
* configure: Regenerate.
* configure.ac: Fix for global_load assembler functions. 

diff --git a/gcc/config.in b/gcc/config.in
index e54f59c..18e6271 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -1431,6 +1431,12 @@
 #endif
 
 
+/* Define if your assembler has fixed global_load functions. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_GCN_ASM_GLOBAL_LOAD_FIXED
+#endif
+
+
 /* Define to 1 if you have the `getchar_unlocked' function. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_GETCHAR_UNLOCKED
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 283a91f..2d27296 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -5481,13 +5481,24 @@ print_operand_address (FILE *file, rtx mem)
  if (vgpr_offset == NULL_RTX)
/* In this case, the vector offset is zero, so we use the first
   lane of v1, which is initialized to zero.  */
-   fprintf (file, "v[1:2]");
+   {
+#if HAVE_GCN_ASM_GLOBAL_LOAD_FIXED == 1
+   fprintf (file, "v1"); 
+#else
+   fprintf (file, "v[1:2]");
+#endif
+   }
  else if (REG_P (vgpr_offset)
   && VGPR_REGNO_P (REGNO (vgpr_offset)))
{
- fprintf (file, "v[%d:%d]",
-  REGNO (vgpr_offset) - FIRST_VGPR_REG,
-  REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
+#if HAVE_GCN_ASM_GLOBAL_LOAD_FIXED == 1
+   fprintf (file, "v%d",
+REGNO (vgpr_offset) - FIRST_VGPR_REG);
+#else
+   fprintf (file, "v[%d:%d]",
+REGNO (vgpr_offset) - FIRST_VGPR_REG,
+REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
+#endif
}
  else
output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
diff --git a/gcc/configure b/gcc/configure
index 4a9e4fa..8843a8f 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28909,6 +28909,36 @@ case "$target" in
 ;;
 esac
 
+# This tests if the assembler supports two registers for global_load functions
+# (like in LLVM versions <12) or one register (like in LLVM 12).
+case "$target" in
+  amdgcn-* | gcn-*)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler fix for 
global_load functions" >&5
+$as_echo_n "checking assembler fix for global_load functions... " >&6; }
+gcc_cv_as_gcn_global_load_fixed=yes
+if test x$gcc_cv_as != x; then
+  cat > conftest.s < /dev/null 2>&1; then
+gcc_cv_as_gcn_global_load_fixed=no
+  fi
+  rm -f conftest.s conftest.o conftest
+fi
+if test x$gcc_cv_as_gcn_global_load_fixed = xyes; then
+
+$as_echo "#define HAVE_GCN_ASM_GLOBAL_LOAD_FIXED 1" >>confdefs.h
+
+else
+
+$as_echo "#define HAVE_GCN_ASM_GLOBAL_LOAD_FIXED 0" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_gcn_global_load_fixed" >&5
+$as_echo "$gcc_cv_as_gcn_global_load_fixed" >&6; }
+;;
+esac
+
 # ??? Not all targets support dwarf2 debug_line, even within a version
 # of gas.  Moreover, we need to emit a valid instruction to trigger any
 # info to the output file.  So, as supported targets are added to gas 2.11,
diff --git a/gcc/configure.ac b/gcc/configure.ac
index d9fc3c2..e179ce1 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5357,6 +5357,30 @@ case "$target" in
 ;;
 esac
 
+# This tests if the assembler supports two registers for global_load functions
+# (like in LLVM versions <12) or one register (like in LLVM 12).
+case "$target" in
+  amdgcn-* | gcn-*)
+AC_MSG_CH

[PATCH] gcc/configure.ac: fix register issue for global_load assembler functions

2021-06-09 Thread Marcel Vollweiler

This patch fixes an issue with global_load assembler functions leading
to a "invalid operand for instruction" error since in different LLVM
versions those functions use either one or two registers.

In this patch a compatibility check is added to the configure.ac.

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
gcc/ChangeLog: adapt configuration according to assembler fix of global_load 
functions.

* config.in: Regenerate.
* config/gcn/gcn.c (print_operand_address): Fix for global_load 
assembler
functions.
* configure: Regenerate.
* configure.ac: Fix for global_load assembler functions. 

diff --git a/gcc/config.in b/gcc/config.in
index e54f59c..18e6271 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -1431,6 +1431,12 @@
 #endif
 
 
+/* Define if your assembler has fixed global_load functions. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_GCN_ASM_GLOBAL_LOAD_FIXED
+#endif
+
+
 /* Define to 1 if you have the `getchar_unlocked' function. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_GETCHAR_UNLOCKED
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 283a91f..2d27296 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -5481,13 +5481,24 @@ print_operand_address (FILE *file, rtx mem)
  if (vgpr_offset == NULL_RTX)
/* In this case, the vector offset is zero, so we use the first
   lane of v1, which is initialized to zero.  */
-   fprintf (file, "v[1:2]");
+   {
+#if HAVE_GCN_ASM_GLOBAL_LOAD_FIXED == 1
+   fprintf (file, "v1"); 
+#else
+   fprintf (file, "v[1:2]");
+#endif
+   }
  else if (REG_P (vgpr_offset)
   && VGPR_REGNO_P (REGNO (vgpr_offset)))
{
- fprintf (file, "v[%d:%d]",
-  REGNO (vgpr_offset) - FIRST_VGPR_REG,
-  REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
+#if HAVE_GCN_ASM_GLOBAL_LOAD_FIXED == 1
+   fprintf (file, "v%d",
+REGNO (vgpr_offset) - FIRST_VGPR_REG);
+#else
+   fprintf (file, "v[%d:%d]",
+REGNO (vgpr_offset) - FIRST_VGPR_REG,
+REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
+#endif
}
  else
output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
diff --git a/gcc/configure b/gcc/configure
index 4a9e4fa..8e044c3 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28909,6 +28909,36 @@ case "$target" in
 ;;
 esac
 
+# This tests if the assembler supports two registers for global_load functions
+# (like in LLVM versions <12) or one register (like in LLVM 12).
+case "$target" in
+  amdgcn-* | gcn-*)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler fix for 
global_load functions" >&5
+$as_echo_n "checking assembler fix for global_load functions... " >&6; }
+gcc_cv_as_global_load_fixed=yes
+if test x$gcc_cv_as != x; then
+  cat > conftest.s < /dev/null 2>&1; then
+gcc_cv_as_global_load_fixed=no
+  fi
+  rm -f conftest.s conftest.o conftest
+fi
+if test x$gcc_cv_as_global_load_fixed = xyes; then
+
+$as_echo "#define HAVE_GCN_ASM_GLOBAL_LOAD_FIXED 1" >>confdefs.h
+
+else
+
+$as_echo "#define HAVE_GCN_ASM_GLOBAL_LOAD_FIXED 0" >>confdefs.h
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_global_load_fixed" >&5
+$as_echo "$gcc_cv_as_global_load_fixed" >&6; }
+;;
+esac
+
 # ??? Not all targets support dwarf2 debug_line, even within a version
 # of gas.  Moreover, we need to emit a valid instruction to trigger any
 # info to the output file.  So, as supported targets are added to gas 2.11,
diff --git a/gcc/configure.ac b/gcc/configure.ac
index d9fc3c2..d7ea224 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5357,6 +5357,30 @@ case "$target" in
 ;;
 esac
 
+# This tests if the assembler supports two registers for global_load functions
+# (like in LLVM versions <12) or one register (like in LLVM 12).
+case "$target" in
+  amdgcn-* | gcn-*)
+AC_MSG_CHECKING(assembler fix for global_load functions)
+gcc_cv_as_global_load_fixed=yes
+if test x$gcc_cv_as != x; then
+  cat > conftest.s < /dev/null 2>&1; then
+gcc_cv_as_global_load_fixed=no
+  fi
+  rm -f conftest.s conftest.o conftest
+fi
+if test x$gcc_cv_as_global_load_fixed = xyes; then
+  AC_DEFINE(HAVE_GCN_ASM_GLOBAL_LOAD_FIXED, 1, [Define if your assembler 
has fixed global_load functions.])
+else
+  AC_DEFINE(HAVE_GCN_ASM_GLOBAL_LOAD_FIXED, 0, [Define if your assembler 
has fixed global_load functions.])
+fi
+AC_MSG_RESULT($gcc_cv_as_global_load_fixed)
+;;
+esac
+
 # ??? Not all targets support dwarf2 debug_line, even within a 

Re: [PATCH] Fortran/OpenMP: Add support for 'close' in map clause

2021-05-20 Thread Marcel Vollweiler

Hi Jakub,

Am 20.05.2021 um 10:57 schrieb Jakub Jelinek:

On Thu, May 20, 2021 at 10:47:52AM +0200, Marcel Vollweiler wrote:

--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -1710,10 +1710,21 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
   && gfc_match ("map ( ") == MATCH_YES)
 {
   locus old_loc2 = gfc_current_locus;
-  bool always = false;
+
+  int always = 0;
+  int close = 0;


The vertical space should be after the 3 variable declarations
rather than in between 1 and 2.


Changed.




+  for (;;)
+{
+  if (gfc_match ("always ") == MATCH_YES)
+always++;
+  else if (gfc_match ("close ") == MATCH_YES)
+close++;
+  else
+break;
+  gfc_match (", ");
+}
+
   gfc_omp_map_op map_op = OMP_MAP_TOFROM;
-  if (gfc_match ("always , ") == MATCH_YES)
-always = true;
   if (gfc_match ("alloc : ") == MATCH_YES)
 map_op = OMP_MAP_ALLOC;
   else if (gfc_match ("tofrom : ") == MATCH_YES)
@@ -1726,11 +1737,24 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
 map_op = OMP_MAP_RELEASE;
   else if (gfc_match ("delete : ") == MATCH_YES)
 map_op = OMP_MAP_DELETE;
-  else if (always)
+  else
 {
   gfc_current_locus = old_loc2;
-  always = false;
+  always = 0;
+  close = 0;
 }
+
+  if (always > 1)
+{
+  gfc_error ("too many % modifiers at %C");
+  break;
+}
+  if (close > 1)
+{
+  gfc_error ("too many % modifiers at %C");
+  break;


I think it would be nice to show the locus of the second always or close
modifier.  Could the loop above remember that locus when always++ == 1
(or ++always == 2) and similarly for close and use it when printing the
error?


Good point. I changed the loop and the error messages accordingly.


And similarly to the C/C++ patch, better use always_modifier and
close_modifier as the names of the variables, as close is a function and
could be defined as macro.


Changed.



  Jakub



Thanks!

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
Fortran/OpenMP: Add support for 'close' in map clause

gcc/fortran/ChangeLog: 

* openmp.c (gfc_match_omp_clauses): Support map-type-modifier 'close'.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/map-6.f90: New test.
* gfortran.dg/gomp/map-7.f90: New test.
* gfortran.dg/gomp/map-8.f90: New test.

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 7eeabff..f8d198e 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -1710,27 +1710,62 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
  && gfc_match ("map ( ") == MATCH_YES)
{
  locus old_loc2 = gfc_current_locus;
- bool always = false;
+ int always_modifier = 0;
+ int close_modifier = 0;
+ locus second_always_locus;
+ locus second_close_locus;
+
+ for (;;)
+   {
+ locus current_locus = gfc_current_locus;
+ if (gfc_match ("always ") == MATCH_YES)
+   {
+ if (always_modifier++ == 1)
+   second_always_locus = current_locus;
+   }
+ else if (gfc_match ("close ") == MATCH_YES)
+   {
+ if (close_modifier++ == 1)
+   second_close_locus = current_locus;
+   }
+ else
+   break;
+ gfc_match (", ");
+   }
+
  gfc_omp_map_op map_op = OMP_MAP_TOFROM;
- if (gfc_match ("always , ") == MATCH_YES)
-   always = true;
  if (gfc_match ("alloc : ") == MATCH_YES)
map_op = OMP_MAP_ALLOC;
  else if (gfc_match ("tofrom : ") == MATCH_YES)
-   map_op = always ? OMP_MAP_ALWAYS_TOFROM : OMP_MAP_TOFROM;
+   map_op = always_modifier ? OMP_MAP_ALWAYS_TOFROM : 
OMP_MAP_TOFROM;
  else if (gfc_match ("to : ") == MATCH_YES)
-   map_op = always ? OMP_MAP_ALWAYS_TO : OMP_MAP_TO;
+   map_op = always_modifier ? OMP_MAP_ALWAYS_TO : OMP_MAP_TO;
  else if (gfc_match ("from : ") == MATCH_YES)
-   map_op = always ? OMP_M

[PATCH] Fortran/OpenMP: Add support for 'close' in map clause

2021-05-20 Thread Marcel Vollweiler

Hi,

This patch adds handling for the map-type-modifier 'close' in the map
clause in the Fortran parser (for C and C++ parsers the changes were
already committed).

'close' was introduced with OpenMP 5.0: "The close map-type-modifier is
a hint to the runtime to allocate memory close to the target device."
In OpenMP 5.0 'close' can be used beside/together with 'always' in a
list of map-type-modifiers.

This patch also considers the optional commas in the modifier list,
which the old code did not (although the comma after 'always' was
already optional in OpenMP 4.5).

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
Fortran/OpenMP: Add support for 'close' in map clause

gcc/fortran/ChangeLog: 

* openmp.c (gfc_match_omp_clauses): Support map-type-modifier 'close'.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/map-6.f90: New test.
* gfortran.dg/gomp/map-7.f90: New test.
* gfortran.dg/gomp/map-8.f90: New test.

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 7eeabff..bec852a 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -1710,10 +1710,21 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
  && gfc_match ("map ( ") == MATCH_YES)
{
  locus old_loc2 = gfc_current_locus;
- bool always = false;
+
+ int always = 0;
+ int close = 0;
+ for (;;)
+   {
+ if (gfc_match ("always ") == MATCH_YES)
+   always++;
+ else if (gfc_match ("close ") == MATCH_YES)
+   close++;
+ else
+   break;
+ gfc_match (", ");
+   }
+
  gfc_omp_map_op map_op = OMP_MAP_TOFROM;
- if (gfc_match ("always , ") == MATCH_YES)
-   always = true;
  if (gfc_match ("alloc : ") == MATCH_YES)
map_op = OMP_MAP_ALLOC;
  else if (gfc_match ("tofrom : ") == MATCH_YES)
@@ -1726,11 +1737,24 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
map_op = OMP_MAP_RELEASE;
  else if (gfc_match ("delete : ") == MATCH_YES)
map_op = OMP_MAP_DELETE;
- else if (always)
+ else
{
  gfc_current_locus = old_loc2;
- always = false;
+ always = 0;
+ close = 0;
}
+
+ if (always > 1)
+   {
+ gfc_error ("too many % modifiers at %C");
+ break;
+   }
+ if (close > 1)
+   {
+ gfc_error ("too many % modifiers at %C");
+ break;
+   }
+
  head = NULL;
  if (gfc_match_omp_variable_list ("", >lists[OMP_LIST_MAP],
   false, NULL, ,
@@ -1741,8 +1765,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
omp_mask mask,
n->u.map_op = map_op;
  continue;
}
- else
-   gfc_current_locus = old_loc;
+ gfc_current_locus = old_loc;
+ break;
}
  if ((mask & OMP_CLAUSE_MERGEABLE) && !c->mergeable
  && gfc_match ("mergeable") == MATCH_YES)
diff --git a/gcc/testsuite/gfortran.dg/gomp/map-6.f90 
b/gcc/testsuite/gfortran.dg/gomp/map-6.f90
new file mode 100644
index 000..309f845
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/map-6.f90
@@ -0,0 +1,50 @@
+! { dg-additional-options "-fdump-tree-original" }
+
+implicit none
+
+integer :: a, b, b1, b2, b3, b4, b5, b6
+
+!$omp target map(a)
+!$omp end target
+
+!$omp target map(to : a)
+!$omp end target
+
+!$omp target map(always to: a)
+!$omp end target
+!$omp target map(always, to: a)
+!$omp end target
+!$omp target map(close to: a)
+!$omp end target
+!$omp target map(close, to: a)
+!$omp end target
+
+!$omp target map(close always to:b1)
+!$omp end target
+!$omp target map(close, always to:b2)
+!$omp end target
+!$omp target map(close, always, to:b3)
+!$omp end target
+!$omp target map(always close to:b4)
+!$omp end target
+!$omp target map(always, close to:b5)
+!$omp end target
+!$omp target map(always, close, to:b6)
+!$omp end target
+
+
+!$omp target map (always to : a) map (close to : b)
+!$omp end target
+
+end
+
+! { dg-final { scan-tree-dump-not "map\\(\[^\n\r)]*close\[^\n\r)]*to:" 
"original" } }
+
+! { dg-final { scan-tree-dump-times "#pragma omp target map\\(always,to:" 9 
"original" } }
+
+! { dg-final { scan-tree-dump "#pragma omp target map\\(always,to:b1\\)" 
"original" } }
+! { dg-final { scan-tree-dump "#pragma omp target map\\(always,to:b2\\)" 
"original" } }
+! { dg-final { scan-tree-dump "#pragma omp target 

Re: [Committed] MAINTAINERS: Add myself for write after approval

2021-05-12 Thread Marcel Vollweiler

Am 12.05.2021 um 19:37 schrieb Marcel Vollweiler:

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung,
Frank Thürauf


-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
commit 8982a5354d2697eeb12a77d49b6730db90053618
Author: Marcel Vollweiler 
Date:   Wed May 12 10:14:41 2021 -0700

MAINTAINERS: Add myself for write after approval

ChangeLog:

2021-05-12  Marcel Vollweiler  

* MAINTAINERS (Write After Approval): Add myself.

diff --git a/MAINTAINERS b/MAINTAINERS
index 44a51a7..5b10f21 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -643,6 +643,7 @@ Ilya Verbin 

 Andre Vieira   
 Rasmus Villemoes   
 Kugan Vivekanandarajah 
+Marcel Vollweiler  
 Ville Voutilainen  
 Nenad Vukicevic
 Feng Wang  


[Committed] MAINTAINERS: Add myself for write after approval

2021-05-12 Thread Marcel Vollweiler

-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
commit 8982a5354d2697eeb12a77d49b6730db90053618
Author: Marcel Vollweiler 
Date:   Wed May 12 10:14:41 2021 -0700

MAINTAINERS: Add myself for write after approval

ChangeLog:

2021-05-12  Marcel Vollweiler  

* MAINTAINERS (Write After Approval): Add myself.


Re: [PATCH] OpenMP: Add support for 'close' in map clause

2021-05-12 Thread Marcel Vollweiler


Am 11.05.2021 um 17:20 schrieb Jakub Jelinek:

On Tue, May 11, 2021 at 04:27:55PM +0200, Marcel Vollweiler wrote:

The usual wording would be
"too many % modifiers"



Changed for 'always' and 'close' for C and C++.


One extra thing, sorry, forgot to mention, for the translators it might be
better to use "too many %qs modifiers", "always" (or, "close").
That way they can translate it just once instead of twice.


IMHO you should at least check that tok->type == CPP_NAME before
checking pos + 1 token's type, you don't want to skip over CPP_EOF,
CPP_PRAGMA_EOF, or even CPP_CLOSE_PAREN etc.
Perhaps by adding
if (tok->type != CPP_NAME)
   break;
right after c_token *tok = c_parser_peek_nth_token_raw (parser, pos); ?


The check of the token's type at position 'pos' is done in the condition
of the while loop, that means
'c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COLON'
is only reached when
'c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME'
holds (since 'pos' is not changed in between).


You're right.


And, IMHO something should clear always and close (btw, might be better
to use close_modifier as variable name and for consistency always_modifier)
unless we reach the CPP_COLON case.



Good point, I agree with both. Cleared and renamed :)


I think the clearing is still insufficient.
It will clear on e.g. map (always, close, foobar)
but not on map (always) or map (always, close)
because in that case the loop terminates by the while loop condition
no longer being true.


That's true. This is modified together with the diagnostics (see below).



And there is another thing I have missed (and again should be in the
testsuite):
map (always, always)
or
map (always, close, close)
etc. will with the patch diagnose that too many 'always' modifiers
(or 'close'), but that isn't correct diagnostic, there aren't any
modifiers, but the same variable is mapped multiple times.

So, one possibility is to remember details like:
potential always modifier has been seen
potential always modifier has been seen more than once
potential close modifier has been seen
potential close modifier has been seen more than once
and only when seeing the colon enact them and diagnose too many modifiers
(but then not with cp_parser_error but error with a location_t of one of the
modifiers), e.g. always_modifier == -1 would mean 1 potential has been seen,
== -2 more than one potential and == 1 it was modifier.

Or another one is not to do much in the first raw token lookup loop,
just check if it is a sequence of
always
close
,
tokens followed by
CPP_NAME (other than always, close) + CPP_CLONE combo
and in that case just set a bool flag that map-kind is present,
but don't consume any tokens.
And then in another loop if that bool flag is set, lookup non-raw
tokens and parse them, setting flags, doing diagnostics etc.
Basically do the look-ahead only to check if it is
map (var1, var2, ...)
case
or
map (modifiers map-kind: var1, var2, ...)
case.


I changed the patch similar to your second approach. I.e., use a first
loop to check if and where a potential map-type is given. In the second
loop (which is only entered if a potential map-type exists) the tokens
are consumed and diagnostic is applied.

I avoided the diagnostic for the variables (besides the modifier) at
this place, since this should continue to be done in
'c_parser_omp_variable_list' / 'cp_parser_omp_var_list_no_open' from my
point of view.



  Jakub



The new version of the patch is attached.

Thanks,

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
OpenMP: Add support for 'close' in map clause

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_map): Support map-type-modifier 
'close'.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_map): Support map-type-modifier 
'close'.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/map-6.c: New test.
* c-c++-common/gomp/map-7.c: New test.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 5cdeb21..c7f3f18 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -15643,54 +15643,83 @@ c_parser_omp_clause_depend (c_parser *parser, tree 
list)
map-kind:
  alloc | to | from | tofrom | release | delete
 
-   map ( always [,] map-kind: variable-list ) */
+   map ( always [,] map-kind: variable-list )
+
+   OpenMP 5.0:
+   map ( [map-type-modifier[,] ...] map-kind: variable-list )
+
+   map-type-modifier:
+ always | close */
 
 static tree
 c_parser_omp_clause_map (c_parser *parser, tree list)
 {
   location_t clause_loc = c_parser_peek_token (parser)->location;
   enum gomp_map_kind kind = GOMP_MAP_TOFROM;
-  int always = 0;
-  enum c_id_kind always_id_kind = C_ID_NONE;
-  location_t always_loc = UNKNOWN_LOCATION;
-  tree always_id 

Re: [PATCH] OpenMP: Add support for 'close' in map clause

2021-05-11 Thread Marcel Vollweiler


Am 10.05.2021 um 20:34 schrieb Jakub Jelinek:

On Mon, May 10, 2021 at 04:11:39PM +0200, Marcel Vollweiler wrote:

@@ -15660,37 +15665,54 @@ c_parser_omp_clause_map (c_parser *parser, tree list)
if (!parens.require_open (parser))
  return list;

-  if (c_parser_next_token_is (parser, CPP_NAME))
+  int always = 0;
+  int close = 0;
+  int pos = 1;
+  while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME)


Nice, totally missed that Joseph has added this.


  {
-  c_token *tok = c_parser_peek_token (parser);
+  c_token *tok = c_parser_peek_nth_token_raw (parser, pos);
const char *p = IDENTIFIER_POINTER (tok->value);
-  always_id_kind = tok->id_kind;
-  always_loc = tok->location;
-  always_id = tok->value;
if (strcmp ("always", p) == 0)
 {
-  c_token *sectok = c_parser_peek_2nd_token (parser);
-  if (sectok->type == CPP_COMMA)
+  if (always)
 {
-  c_parser_consume_token (parser);
-  c_parser_consume_token (parser);
-  always = 2;
+  c_parser_error (parser, "expected modifier % only once");


The usual wording would be
"too many % modifiers"



Changed for 'always' and 'close' for C and C++.


+  parens.skip_until_found_close (parser);
+  return list;
+}
+
+  always_id_kind = tok->id_kind;
+  always_loc = tok->location;
+  always_id = tok->value;


But you don't need any of the always_{id_kind,loc,id} variables anymore,
so they should be removed and everything that touches them too.



That's true. I removed them.


+
+  always++;
+}
+  else if (strcmp ("close", p) == 0)
+{
+  if (close)
+{
+  c_parser_error (parser, "expected modifier % only once");


Similarly.


+  parens.skip_until_found_close (parser);
+  return list;
 }
-  else if (sectok->type == CPP_NAME)
+
+  close++;
+}
+  else if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == 
CPP_COLON)


IMHO you should at least check that tok->type == CPP_NAME before
checking pos + 1 token's type, you don't want to skip over CPP_EOF,
CPP_PRAGMA_EOF, or even CPP_CLOSE_PAREN etc.
Perhaps by adding
   if (tok->type != CPP_NAME)
  break;
right after c_token *tok = c_parser_peek_nth_token_raw (parser, pos); ?


The check of the token's type at position 'pos' is done in the condition
of the while loop, that means
   'c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COLON'
is only reached when
   'c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME'
holds (since 'pos' is not changed in between).




+{
+  for (int i = 1; i < pos; ++i)
 {
-  p = IDENTIFIER_POINTER (sectok->value);
-  if (strcmp ("alloc", p) == 0
-  || strcmp ("to", p) == 0
-  || strcmp ("from", p) == 0
-  || strcmp ("tofrom", p) == 0
-  || strcmp ("release", p) == 0
-  || strcmp ("delete", p) == 0)
-{
-  c_parser_consume_token (parser);
-  always = 1;
-}
+  c_parser_peek_token(parser);


Formatting, space before (



Corrected.


+  c_parser_consume_token (parser);
 }
+  break;


And, IMHO something should clear always and close (btw, might be better
to use close_modifier as variable name and for consistency always_modifier)
unless we reach the CPP_COLON case.



Good point, I agree with both. Cleared and renamed :)


Because we don't want
   map (always, close)
to imply
   map (always, close, tofrom: always, close)
but
   map (tofrom: always, close)
and my reading of your changes suggests that we actually use the
*_ALWAYS* kinds in that case.


+  cp_parser_error (parser,
+   "expected modifier % only once");


See above.


+  cp_parser_skip_to_closing_parenthesis (parser,
+ /*recovering=*/true,
+ /*or_comma=*/false,
+ /*consume_paren=*/true);
+  return list;
+}
+
+  always = true;
+}
+  else if (strcmp ("close", p) == 0)
+{
+  if (close)
+{
+  cp_parser_error (parser,
+   "expected modifier % only once");


Likewise.


+  else if (cp_lexer_peek_nth_token (parser->lexer, pos + 1)->type
+   == CPP_COLON)
+{
+  for (int i = 1; i < pos; ++i)
+cp_lexer_consume_token (parser->lexer);
+  break;
+}
+  else
+break;
+
+  if (cp_lexer_peek_nth_token (parser->lexer, pos + 1)->type == CPP_COMMA)
+pos++;
+  pos++;
  }


Again, I don't see anything that would clear alw

[PATCH] OpenMP: Add support for 'close' in map clause

2021-05-10 Thread Marcel Vollweiler

Hi,

This patch adds handling for the map-type-modifier 'close' in the map
clause that was introduced with OpenMP 5.0: "The close map-type-modifier
is a hint to the runtime to allocate memory close to the target device."

In OpenMP 5.0 'close' can be used beside/together with 'always' in a
list of map-type-modifiers.

With this patch, 'close' will be parsed and ignored for C and C++. A
patch for Fortran will be provided separately.

Marcel
-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
OpenMP: Add support for 'close' in map clause

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_map): Support map-type-modifier 
'close'.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_map): Support map-type-modifier 
'close'.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/map-6.c: New test.
* c-c++-common/gomp/map-7.c: New test.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 5cdeb21..78cba7f 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -15643,14 +15643,19 @@ c_parser_omp_clause_depend (c_parser *parser, tree 
list)
map-kind:
  alloc | to | from | tofrom | release | delete
 
-   map ( always [,] map-kind: variable-list ) */
+   map ( always [,] map-kind: variable-list )
+
+   OpenMP 5.0:
+   map ( [map-type-modifier[,] ...] map-kind: variable-list )
+
+   map-type-modifier:
+ always | close */
 
 static tree
 c_parser_omp_clause_map (c_parser *parser, tree list)
 {
   location_t clause_loc = c_parser_peek_token (parser)->location;
   enum gomp_map_kind kind = GOMP_MAP_TOFROM;
-  int always = 0;
   enum c_id_kind always_id_kind = C_ID_NONE;
   location_t always_loc = UNKNOWN_LOCATION;
   tree always_id = NULL_TREE;
@@ -15660,37 +15665,54 @@ c_parser_omp_clause_map (c_parser *parser, tree list)
   if (!parens.require_open (parser))
 return list;
 
-  if (c_parser_next_token_is (parser, CPP_NAME))
+  int always = 0;
+  int close = 0;
+  int pos = 1;
+  while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME)
 {
-  c_token *tok = c_parser_peek_token (parser);
+  c_token *tok = c_parser_peek_nth_token_raw (parser, pos);
   const char *p = IDENTIFIER_POINTER (tok->value);
-  always_id_kind = tok->id_kind;
-  always_loc = tok->location;
-  always_id = tok->value;
   if (strcmp ("always", p) == 0)
{
- c_token *sectok = c_parser_peek_2nd_token (parser);
- if (sectok->type == CPP_COMMA)
+ if (always)
{
- c_parser_consume_token (parser);
- c_parser_consume_token (parser);
- always = 2;
+ c_parser_error (parser, "expected modifier % only once");
+ parens.skip_until_found_close (parser);
+ return list;
+   }
+
+ always_id_kind = tok->id_kind;
+ always_loc = tok->location;
+ always_id = tok->value;
+
+ always++;
+   }
+  else if (strcmp ("close", p) == 0)
+   {
+ if (close)
+   {
+ c_parser_error (parser, "expected modifier % only once");
+ parens.skip_until_found_close (parser);
+ return list;
}
- else if (sectok->type == CPP_NAME)
+
+ close++;
+   }
+  else if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == 
CPP_COLON)
+   {
+ for (int i = 1; i < pos; ++i)
{
- p = IDENTIFIER_POINTER (sectok->value);
- if (strcmp ("alloc", p) == 0
- || strcmp ("to", p) == 0
- || strcmp ("from", p) == 0
- || strcmp ("tofrom", p) == 0
- || strcmp ("release", p) == 0
- || strcmp ("delete", p) == 0)
-   {
- c_parser_consume_token (parser);
- always = 1;
-   }
+ c_parser_peek_token(parser);
+ c_parser_consume_token (parser);
}
+ break;
}
+  else
+   break;
+
+  if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COMMA)
+   pos++;
+  pos++;
 }
 
   if (c_parser_next_token_is (parser, CPP_NAME)
@@ -15719,35 +15741,6 @@ c_parser_omp_clause_map (c_parser *parser, tree list)
   c_parser_consume_token (parser);
   c_parser_consume_token (parser);
 }
-  else if (always)
-{
-  if (always_id_kind != C_ID_ID)
-   {
- c_parser_error (parser, "expected identifier");
- parens.skip_until_found_close (parser);
- return list;
-   }
-
-  tree t = lookup_name (always_id);
-  if (t == NULL_TREE)
-   {
- undeclared_variable (always_loc, always_id);
- t = error_mark_node;
-   }
-  if (t != error_mark_node)
-   {
- tree u = build_omp_clause (clause_loc, OMP_CLAUSE_MAP);
- OMP_CLAUSE_DECL (u)