Hi,
this patch adds a test-case with a double reduction in an oacc kernels
region.
In order to get it in the proper shape for parloops to deal with, I
needed to repeat the pass_lim/pass_copy_prop sequence.
Bootstrapped and reg-tested on x86_64.
Committed to gomp-4_0-branch.
Thanks,
- Tom
Handle double reduction in oacc kernels pass group
2015-07-28 Tom de Vries <t...@codesourcery.com>
* passes.def: Repeat pass_lim and pass_copy_prop in oacc kernels pass
group.
* c-c++-common/goacc/kernels-double-reduction.c: New test.
---
gcc/passes.def | 2 ++
.../c-c++-common/goacc/kernels-double-reduction.c | 37 ++++++++++++++++++++++
2 files changed, 39 insertions(+)
create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
diff --git a/gcc/passes.def b/gcc/passes.def
index ae91ed1..e31e39f 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -96,6 +96,8 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_tree_loop_init);
NEXT_PASS (pass_lim);
NEXT_PASS (pass_copy_prop);
+ NEXT_PASS (pass_lim);
+ NEXT_PASS (pass_copy_prop);
NEXT_PASS (pass_scev_cprop);
NEXT_PASS (pass_parallelize_loops_oacc_kernels);
NEXT_PASS (pass_expand_omp_ssa);
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
new file mode 100644
index 0000000..81467a9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -0,0 +1,37 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N 500
+
+unsigned int a[N][N];
+
+void __attribute__((noinline,noclone))
+foo (void)
+{
+ int i, j;
+ unsigned int sum = 1;
+
+#pragma acc kernels copyin (a[0:N]) copy (sum)
+ {
+ for (i = 0; i < N; ++i)
+ for (j = 0; j < N; ++j)
+ sum += a[i][j];
+ }
+
+ if (sum != 5001)
+ abort ();
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
--
1.9.1