Hi!

On 2018-08-13T21:41:50+0100, Julian Brown <jul...@codesourcery.com> wrote:
> On Mon, 13 Aug 2018 11:42:26 -0700 Cesar Philippidis <ce...@codesourcery.com> 
> wrote:
>> On 08/13/2018 09:21 AM, Julian Brown wrote:
>> > diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c 
>> > b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
>> > new file mode 100644
>> > index 0000000..2fa708a
>> > --- /dev/null
>> > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
>> > @@ -0,0 +1,106 @@
>> > +/* { dg-xfail-run-if "gangprivate failure" { 
>> > openacc_nvidia_accel_selected } { "-O0" } { "" } } */

>> is the above xfail still necessary? It seems to xpass
>> for me on nvptx. However, I see this regression on the host:
>>
>> FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/loop-gwv-2.c
>> -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1  -O2  execution test

> Oops, this was the version of the patch I meant to post (and the one I
> tested). The XFAIL on loop-gwv-2.c isn't necessary, plus that test
> needed some other fixes to make it pass for NVPTX (it was written for
> GCN to start with).

As I should find out later, this testcase actually does work without the
code changes (OpenACC privatization levels) that it's accompanying -- and
I don't actually see anything in the testcase that the code changes would
trigger for.  Maybe it was for some earlier revision of these code
changes?  Anyway, as it's all-PASS for all systems that I've tested on,
I've now pushed "Add 'libgomp.oacc-c-c++-common/loop-gwv-2.c'" to master
branch in commit 5a16fb19e7c4274f8dd9bbdd30d7d06fe2eff8af, see attached.


Grüße
 Thomas


-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
>From 5a16fb19e7c4274f8dd9bbdd30d7d06fe2eff8af Mon Sep 17 00:00:00 2001
From: Julian Brown <jul...@codesourcery.com>
Date: Mon, 13 Aug 2018 21:41:50 +0100
Subject: [PATCH] Add 'libgomp.oacc-c-c++-common/loop-gwv-2.c'

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: New.
---
 .../libgomp.oacc-c-c++-common/loop-gwv-2.c    | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
new file mode 100644
index 00000000000..a4f81a39e24
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <alloca.h>
+#include <string.h>
+#include <gomp-constants.h>
+#include <stdlib.h>
+
+#if 0
+#define DEBUG(DIM, IDX, VAL) \
+  fprintf (stderr, "%sdist[%d] = %d\n", (DIM), (IDX), (VAL))
+#else
+#define DEBUG(DIM, IDX, VAL)
+#endif
+
+#define N (32*32*32)
+
+int
+check (const char *dim, int *dist, int dimsize)
+{
+  int ix;
+  int exit = 0;
+
+  for (ix = 0; ix < dimsize; ix++)
+    {
+      DEBUG(dim, ix, dist[ix]);
+      if (dist[ix] < (N) / (dimsize + 0.5)
+	  || dist[ix] > (N) / (dimsize - 0.5))
+	{
+	  fprintf (stderr, "did not distribute to %ss (%d not between %d "
+		   "and %d)\n", dim, dist[ix], (int) ((N) / (dimsize + 0.5)),
+		   (int) ((N) / (dimsize - 0.5)));
+	  exit |= 1;
+	}
+    }
+
+  return exit;
+}
+
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int gangsize = 0, workersize = 0, vectorsize = 0;
+  int *gangdist, *workerdist, *vectordist;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+	    copy(ary) copyout(gangsize, workersize, vectorsize)
+  {
+#pragma acc loop gang worker vector
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int g, w, v;
+
+	g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+	w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+	v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+	ary[ix] = (g << 16) | (w << 8) | v;
+      }
+
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+  }
+
+  gangdist = (int *) alloca (gangsize * sizeof (int));
+  workerdist = (int *) alloca (workersize * sizeof (int));
+  vectordist = (int *) alloca (vectorsize * sizeof (int));
+  memset (gangdist, 0, gangsize * sizeof (int));
+  memset (workerdist, 0, workersize * sizeof (int));
+  memset (vectordist, 0, vectorsize * sizeof (int));
+
+  /* Test that work is shared approximately equally amongst each active
+     gang/worker/vector.  */
+  for (ix = 0; ix < N; ix++)
+    {
+      int g = (ary[ix] >> 16) & 255;
+      int w = (ary[ix] >> 8) & 255;
+      int v = ary[ix] & 255;
+
+      gangdist[g]++;
+      workerdist[w]++;
+      vectordist[v]++;
+    }
+
+  exit = check ("gang", gangdist, gangsize);
+  exit |= check ("worker", workerdist, workersize);
+  exit |= check ("vector", vectordist, vectorsize);
+
+  return exit;
+}
-- 
2.30.2

Reply via email to