Hi!

The spec says that reductions should be put on combined parallel for
onto the for and shared on parallel.  This patch handles just inscan
reductions that way, doing it right for all reductions will be slightly more
work and once it works, will allow even cleanups.  But for inscan it has to
be done, the code isn't prepared to look for all this on another construct.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-07-03  Jakub Jelinek  <ja...@redhat.com>

        * gimplify.c (gimplify_scan_omp_clauses): For inscan reductions
        on worksharing loop propagate it as shared clause to containing
        combined parallel.

        * c-omp.c (c_omp_split_clauses): Put OMP_CLAUSE_REDUCTION_INSCAN
        clauses on OMP_FOR rather than OMP_PARALLEL when OMP_FOR is combined
        with OMP_PARALLEL.

        * c-c++-common/gomp/scan-5.c: New test.

--- gcc/gimplify.c.jj   2019-06-27 23:25:15.061064380 +0200
+++ gcc/gimplify.c      2019-07-02 07:31:29.646940498 +0200
@@ -9125,7 +9125,10 @@ gimplify_scan_omp_clauses (tree *list_p,
                          " or private in outer context", DECL_NAME (decl));
            }
        do_notice:
-         if ((region_type & ORT_TASKLOOP) == ORT_TASKLOOP
+         if (((region_type & ORT_TASKLOOP) == ORT_TASKLOOP
+              || (region_type == ORT_WORKSHARE
+                  && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+                  && OMP_CLAUSE_REDUCTION_INSCAN (c)))
              && outer_ctx
              && outer_ctx->region_type == ORT_COMBINED_PARALLEL
              && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
--- gcc/c-family/c-omp.c.jj     2019-06-25 08:59:40.037895623 +0200
+++ gcc/c-family/c-omp.c        2019-07-02 07:17:00.381811500 +0200
@@ -1634,7 +1634,8 @@ c_omp_split_clauses (location_t loc, enu
          break;
        /* Reduction is allowed on simd, for, parallel, sections, taskloop
           and teams.  Duplicate it on all of them, but omit on for or
-          sections if parallel is present.  If taskloop is combined with
+          sections if parallel is present (unless inscan, in that case
+          omit on parallel).  If taskloop is combined with
           parallel, omit it on parallel.  */
        case OMP_CLAUSE_REDUCTION:
          if (OMP_CLAUSE_REDUCTION_TASK (clauses))
@@ -1708,7 +1709,8 @@ c_omp_split_clauses (location_t loc, enu
                  s = C_OMP_CLAUSE_SPLIT_PARALLEL;
                }
              else if ((mask & (OMP_CLAUSE_MASK_1
-                               << PRAGMA_OMP_CLAUSE_NUM_THREADS)) != 0)
+                               << PRAGMA_OMP_CLAUSE_NUM_THREADS)) != 0
+                      && !OMP_CLAUSE_REDUCTION_INSCAN (clauses))
                s = C_OMP_CLAUSE_SPLIT_PARALLEL;
              else
                s = C_OMP_CLAUSE_SPLIT_FOR;
--- gcc/testsuite/c-c++-common/gomp/scan-5.c.jj 2019-07-02 07:49:14.059833063 
+0200
+++ gcc/testsuite/c-c++-common/gomp/scan-5.c    2019-07-02 12:44:11.277154546 
+0200
@@ -0,0 +1,13 @@
+int
+foo (int *a, int *b)
+{
+  int r = 0;
+  #pragma omp parallel for reduction (inscan, +:r) default(none) firstprivate 
(a, b)
+  for (int i = 0; i < 64; i++)
+    {
+      r += a[i];
+      #pragma omp scan inclusive (r)   /* { dg-message "sorry, unimplemented: 
'#pragma omp scan' not supported yet" } */
+      b[i] = r;
+    }
+  return r;
+}

        Jakub

Reply via email to