Re: Runtime checking of OpenACC parallelism dimensions clauses

2017-05-23 Thread Thomas Schwinge
Hi!

On Tue, 23 May 2017 10:25:12 +0200, Jakub Jelinek <ja...@redhat.com> wrote:
> On Thu, May 11, 2017 at 02:24:05PM +0200, Thomas Schwinge wrote:
> > OK for trunk?

> > Runtime checking of OpenACC parallelism dimensions clauses

> Ok.

Thanks.  As posted, committed to trunk in r248358:

commit 681ad5cef0c3153f1233ef178c01ad53e7b9c405
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue May 23 09:16:05 2017 +

    Runtime checking of OpenACC parallelism dimensions clauses

libgomp/
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
* testsuite/lib/libgomp.exp
(check_effective_target_openacc_nvidia_accel_configured): New
proc.
* testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
(check_effective_target_c++): New procs.
* testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
(check_effective_target_c++): Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@248358 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog  |  11 +
 libgomp/testsuite/lib/libgomp.exp  |  12 +
 libgomp/testsuite/libgomp.oacc-c++/c++.exp |   7 +
 .../libgomp.oacc-c-c++-common/parallel-dims.c  | 523 -
 libgomp/testsuite/libgomp.oacc-c/c.exp |   7 +
 5 files changed, 548 insertions(+), 12 deletions(-)

diff --git libgomp/ChangeLog libgomp/ChangeLog
index 8209f9f..8fd5f07 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,3 +1,14 @@
+2017-05-23  Thomas Schwinge  <tho...@codesourcery.com>
+
+   * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
+   * testsuite/lib/libgomp.exp
+   (check_effective_target_openacc_nvidia_accel_configured): New
+   proc.
+   * testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
+   (check_effective_target_c++): New procs.
+   * testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
+   (check_effective_target_c++): Likewise.
+
 2017-05-22  Jakub Jelinek  <ja...@redhat.com>
 
PR middle-end/80809
diff --git libgomp/testsuite/lib/libgomp.exp libgomp/testsuite/lib/libgomp.exp
index 501a860..ea3da2c 100644
--- libgomp/testsuite/lib/libgomp.exp
+++ libgomp/testsuite/lib/libgomp.exp
@@ -359,6 +359,18 @@ proc check_effective_target_offload_device_shared_as { } {
 } ]
 }
 
+# Return 1 if configured for nvptx offloading.
+
+proc check_effective_target_openacc_nvidia_accel_configured { } {
+global offload_targets
+if { ![string match "*,nvptx,*" ",$offload_targets,"] } {
+return 0
+}
+# PR libgomp/65099: Currently, we only support offloading in 64-bit
+# configurations.
+return [is-effective-target lp64]
+}
+
 # Return 1 if at least one nvidia board is present.
 
 proc check_effective_target_openacc_nvidia_accel_present { } {
diff --git libgomp/testsuite/libgomp.oacc-c++/c++.exp 
libgomp/testsuite/libgomp.oacc-c++/c++.exp
index 608b298..9beadd6 100644
--- libgomp/testsuite/libgomp.oacc-c++/c++.exp
+++ libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -4,6 +4,13 @@ load_lib libgomp-dg.exp
 load_gcc_lib gcc-dg.exp
 load_gcc_lib torture-options.exp
 
+proc check_effective_target_c { } {
+return 0
+}
+proc check_effective_target_c++ { } {
+return 1
+}
+
 global shlib_ext
 
 set shlib_ext [get_shlib_extension]
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index f5766a4..d8af546 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -1,25 +1,524 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
+   vector_length.  */
+
+#include 
+#include 
+
+/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
+   not behaving as expected for -O0.  */
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize (&quo

Re: Runtime checking of OpenACC parallelism dimensions clauses

2017-05-23 Thread Jakub Jelinek
On Thu, May 11, 2017 at 02:24:05PM +0200, Thomas Schwinge wrote:
> Hi!
> 
> OK for trunk?
> 
> commit 0ba48b4faf85420fbe12971afdd6e0afe70778bb
> Author: Thomas Schwinge <tho...@codesourcery.com>
> Date:   Fri May 5 16:41:59 2017 +0200
> 
> Runtime checking of OpenACC parallelism dimensions clauses
> 
> libgomp/
> * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
> * testsuite/lib/libgomp.exp
> (check_effective_target_openacc_nvidia_accel_configured): New
> proc.
> * testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
> (check_effective_target_c++): New procs.
> * testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
> (check_effective_target_c++): Likewise.

Ok.

Jakub


Re: Runtime checking of OpenACC parallelism dimensions clauses

2017-05-19 Thread Thomas Schwinge
Hi!

Ping.

On Thu, 11 May 2017 14:24:05 +0200, I wrote:
> OK for trunk?
> 
> commit 0ba48b4faf85420fbe12971afdd6e0afe70778bb
> Author: Thomas Schwinge <tho...@codesourcery.com>
> Date:   Fri May 5 16:41:59 2017 +0200
> 
> Runtime checking of OpenACC parallelism dimensions clauses
> 
> libgomp/
> * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
> * testsuite/lib/libgomp.exp
> (check_effective_target_openacc_nvidia_accel_configured): New
> proc.
> * testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
> (check_effective_target_c++): New procs.
> * testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
> (check_effective_target_c++): Likewise.
> ---
>  libgomp/testsuite/lib/libgomp.exp  |  12 +
>  libgomp/testsuite/libgomp.oacc-c++/c++.exp |   7 +
>  .../libgomp.oacc-c-c++-common/parallel-dims.c  | 523 
> -
>  libgomp/testsuite/libgomp.oacc-c/c.exp |   7 +
>  4 files changed, 537 insertions(+), 12 deletions(-)
> 
> diff --git libgomp/testsuite/lib/libgomp.exp libgomp/testsuite/lib/libgomp.exp
> index 5e47872..62ee2e3 100644
> --- libgomp/testsuite/lib/libgomp.exp
> +++ libgomp/testsuite/lib/libgomp.exp
> @@ -358,6 +358,18 @@ proc check_effective_target_offload_device_shared_as { } 
> {
>  } ]
>  }
>  
> +# Return 1 if configured for nvptx offloading.
> +
> +proc check_effective_target_openacc_nvidia_accel_configured { } {
> +global offload_targets
> +if { ![string match "*,nvptx,*" ",$offload_targets,"] } {
> +return 0
> +}
> +# PR libgomp/65099: Currently, we only support offloading in 64-bit
> +# configurations.
> +return [is-effective-target lp64]
> +}
> +
>  # Return 1 if at least one nvidia board is present.
>  
>  proc check_effective_target_openacc_nvidia_accel_present { } {
> diff --git libgomp/testsuite/libgomp.oacc-c++/c++.exp 
> libgomp/testsuite/libgomp.oacc-c++/c++.exp
> index 608b298..9beadd6 100644
> --- libgomp/testsuite/libgomp.oacc-c++/c++.exp
> +++ libgomp/testsuite/libgomp.oacc-c++/c++.exp
> @@ -4,6 +4,13 @@ load_lib libgomp-dg.exp
>  load_gcc_lib gcc-dg.exp
>  load_gcc_lib torture-options.exp
>  
> +proc check_effective_target_c { } {
> +return 0
> +}
> +proc check_effective_target_c++ { } {
> +return 1
> +}
> +
>  global shlib_ext
>  
>  set shlib_ext [get_shlib_extension]
> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
> libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> index f5766a4..d8af546 100644
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> @@ -1,25 +1,524 @@
> -/* { dg-do run { target openacc_nvidia_accel_selected } } */
> +/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
> +   vector_length.  */
> +
> +#include 
> +#include 
> +
> +/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
> +   not behaving as expected for -O0.  */
> +#pragma acc routine seq
> +static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
> +{
> +  if (acc_on_device ((int) acc_device_host))
> +return 0;
> +  else if (acc_on_device ((int) acc_device_nvidia))
> +{
> +  unsigned int r;
> +  asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
> +  return r;
> +}
> +  else
> +__builtin_abort ();
> +}
> +
> +#pragma acc routine seq
> +static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
> +{
> +  if (acc_on_device ((int) acc_device_host))
> +return 0;
> +  else if (acc_on_device ((int) acc_device_nvidia))
> +{
> +  unsigned int r;
> +  asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
> +  return r;
> +}
> +  else
> +__builtin_abort ();
> +}
> +
> +#pragma acc routine seq
> +static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
> +{
> +  if (acc_on_device ((int) acc_device_host))
> +return 0;
> +  else if (acc_on_device ((int) acc_device_nvidia))
> +{
> +  unsigned int r;
> +  asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
> +  return r;
> +}
> +  else
> +__builtin_abort ();
> +}
>  
> -/* Worker and vector size checks.  Picked an outrageously large
> -   value. */
>  
>  int main ()
>  {
> -  int dummy[10];
> +  acc_init (acc_device_default);
>  
> -#pragma

Re: Runtime checking of OpenACC parallelism dimensions clauses

2017-05-14 Thread Thomas Schwinge
Hi!

On Thu, 11 May 2017 14:24:05 +0200, I wrote:
> OK for trunk?

> Runtime checking of OpenACC parallelism dimensions clauses

For now, committed to gomp-4_0-branch in r248030:

commit 59e5204e0ec16c0f14ec68148f856fd307ef8d51
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Sun May 14 10:25:46 2017 +

Runtime checking of OpenACC parallelism dimensions clauses

libgomp/
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
* testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
(check_effective_target_c++): New procs.
* testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
(check_effective_target_c++): Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@248030 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog.gomp |   8 +
 libgomp/testsuite/libgomp.oacc-c++/c++.exp |   7 +
 .../libgomp.oacc-c-c++-common/parallel-dims.c  | 526 -
 libgomp/testsuite/libgomp.oacc-c/c.exp |   7 +
 4 files changed, 536 insertions(+), 12 deletions(-)

diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index def0feb..a1627a8 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,11 @@
+2017-05-14  Thomas Schwinge  <tho...@codesourcery.com>
+
+   * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
+   * testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
+   (check_effective_target_c++): New procs.
+   * testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
+   (check_effective_target_c++): Likewise.
+
 2017-05-12  Cesar Philippidis  <ce...@codesourcery.com>
 
* testsuite/libgomp.oacc-c-c++-common/par-reduction-3.c: New test.
diff --git libgomp/testsuite/libgomp.oacc-c++/c++.exp 
libgomp/testsuite/libgomp.oacc-c++/c++.exp
index ba1a28e..695b96d 100644
--- libgomp/testsuite/libgomp.oacc-c++/c++.exp
+++ libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -4,6 +4,13 @@ load_lib libgomp-dg.exp
 load_gcc_lib gcc-dg.exp
 load_gcc_lib torture-options.exp
 
+proc check_effective_target_c { } {
+return 0
+}
+proc check_effective_target_c++ { } {
+return 1
+}
+
 global shlib_ext
 
 set shlib_ext [get_shlib_extension]
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index f5766a4..3458757 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -1,25 +1,527 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
+   vector_length.  */
+
+/* { dg-additional-options "-foffload-force" } */
+
+#include 
+#include 
+
+/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
+   not behaving as expected for -O0.  */
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
 
-/* Worker and vector size checks.  Picked an outrageously large
-   value. */
 
 int main ()
 {
-  int dummy[10];
+  acc_init (acc_device_default);
 
-#pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */
+  /* Non-positive value.  */
+
+  /* GR, WS, VS.  */
+  {
+#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { 
target c } } */
+int gangs_actual = GANGS;
+int gangs_min, gangs_max, workers_min, workers_max, vectors_min, 
vectors_max;
+gangs_min = workers_min = vectors_min = INT_MAX;
+gangs_max = workers_max = vectors_max = INT_MIN;
+#pragma acc parallel copy (gangs_actual) \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: 
gangs_ma

Runtime checking of OpenACC parallelism dimensions clauses

2017-05-11 Thread Thomas Schwinge
Hi!

OK for trunk?

commit 0ba48b4faf85420fbe12971afdd6e0afe70778bb
Author: Thomas Schwinge <tho...@codesourcery.com>
Date:   Fri May 5 16:41:59 2017 +0200

Runtime checking of OpenACC parallelism dimensions clauses

libgomp/
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Rewrite.
* testsuite/lib/libgomp.exp
(check_effective_target_openacc_nvidia_accel_configured): New
proc.
* testsuite/libgomp.oacc-c++/c++.exp (check_effective_target_c)
(check_effective_target_c++): New procs.
* testsuite/libgomp.oacc-c/c.exp (check_effective_target_c)
(check_effective_target_c++): Likewise.
---
 libgomp/testsuite/lib/libgomp.exp  |  12 +
 libgomp/testsuite/libgomp.oacc-c++/c++.exp |   7 +
 .../libgomp.oacc-c-c++-common/parallel-dims.c  | 523 -
 libgomp/testsuite/libgomp.oacc-c/c.exp |   7 +
 4 files changed, 537 insertions(+), 12 deletions(-)

diff --git libgomp/testsuite/lib/libgomp.exp libgomp/testsuite/lib/libgomp.exp
index 5e47872..62ee2e3 100644
--- libgomp/testsuite/lib/libgomp.exp
+++ libgomp/testsuite/lib/libgomp.exp
@@ -358,6 +358,18 @@ proc check_effective_target_offload_device_shared_as { } {
 } ]
 }
 
+# Return 1 if configured for nvptx offloading.
+
+proc check_effective_target_openacc_nvidia_accel_configured { } {
+global offload_targets
+if { ![string match "*,nvptx,*" ",$offload_targets,"] } {
+return 0
+}
+# PR libgomp/65099: Currently, we only support offloading in 64-bit
+# configurations.
+return [is-effective-target lp64]
+}
+
 # Return 1 if at least one nvidia board is present.
 
 proc check_effective_target_openacc_nvidia_accel_present { } {
diff --git libgomp/testsuite/libgomp.oacc-c++/c++.exp 
libgomp/testsuite/libgomp.oacc-c++/c++.exp
index 608b298..9beadd6 100644
--- libgomp/testsuite/libgomp.oacc-c++/c++.exp
+++ libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -4,6 +4,13 @@ load_lib libgomp-dg.exp
 load_gcc_lib gcc-dg.exp
 load_gcc_lib torture-options.exp
 
+proc check_effective_target_c { } {
+return 0
+}
+proc check_effective_target_c++ { } {
+return 1
+}
+
 global shlib_ext
 
 set shlib_ext [get_shlib_extension]
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index f5766a4..d8af546 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -1,25 +1,524 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
+   vector_length.  */
+
+#include 
+#include 
+
+/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
+   not behaving as expected for -O0.  */
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
+
+#pragma acc routine seq
+static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
+{
+  if (acc_on_device ((int) acc_device_host))
+return 0;
+  else if (acc_on_device ((int) acc_device_nvidia))
+{
+  unsigned int r;
+  asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
+  return r;
+}
+  else
+__builtin_abort ();
+}
 
-/* Worker and vector size checks.  Picked an outrageously large
-   value. */
 
 int main ()
 {
-  int dummy[10];
+  acc_init (acc_device_default);
 
-#pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */
+  /* Non-positive value.  */
+
+  /* GR, WS, VS.  */
+  {
+#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { 
target c } } */
+int gangs_actual = GANGS;
+int gangs_min, gangs_max, workers_min, workers_max, vectors_min, 
vectors_max;
+gangs_min = workers_min = vectors_min = INT_MAX;
+gangs_max = workers_max = vectors_max = INT_MIN;
+#pragma acc parallel copy (gangs_actual) \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: 
gangs_max, workers_max, vectors_max) \
+  num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" ""