On 22/04/15 09:39, Richard Biener wrote:
Committed to gomp-4_0-branch in r222281:
>
>commit 58c33a7965c379b55b549d50e3b79b2252bcc876
>Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
>Date: Tue Apr 21 19:48:16 2015 +0000
>
> Add pass_ch_oacc_kernels to pass_oacc_kernels
>
> gcc/
> * omp-low.c (loop_in_oacc_kernels_region_p): New function.
> * omp-low.h (loop_in_oacc_kernels_region_p): Declare.
> * passes.def: Add pass_ch_oacc_kernels to pass group
pass_oacc_kernels.
> * tree-pass.h (make_pass_ch_oacc_kernels): Declare
> * tree-ssa-loop-ch.c: Include omp-low.h.
> (pass_ch_execute): Declare.
> (pass_ch::execute): Factor out ...
> (pass_ch_execute): ... this new function. If handling oacc
kernels,
> skip loops that are not in oacc kernels region.
> (pass_ch_oacc_kernels::execute):
> (pass_data_ch_oacc_kernels): New pass_data.
> (class pass_ch_oacc_kernels): New pass.
> (pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New
> function.
>
> git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222281
138bc75d-0d04-0410-961f-82ee72b054a4
>---
> gcc/ChangeLog.gomp | 15 ++++++++
> gcc/omp-low.c | 91
++++++++++++++++++++++++++++++++++++++++++++++++
> gcc/omp-low.h | 2 ++
> gcc/passes.def | 1 +
> gcc/tree-pass.h | 1 +
> gcc/tree-ssa-loop-ch.c | 59 +++++++++++++++++++++++++++++--
> 6 files changed, 167 insertions(+), 2 deletions(-)
>
>diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
>index 8a53ad8..d00c5e0 100644
>--- gcc/ChangeLog.gomp
>+++ gcc/ChangeLog.gomp
>@@ -1,5 +1,20 @@
> 2015-04-21 Tom de Vries<t...@codesourcery.com>
>
>+ * omp-low.c (loop_in_oacc_kernels_region_p): New function.
>+ * omp-low.h (loop_in_oacc_kernels_region_p): Declare.
>+ * passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels.
>+ * tree-pass.h (make_pass_ch_oacc_kernels): Declare
>+ * tree-ssa-loop-ch.c: Include omp-low.h.
>+ (pass_ch_execute): Declare.
>+ (pass_ch::execute): Factor out ...
>+ (pass_ch_execute): ... this new function. If handling oacc kernels,
>+ skip loops that are not in oacc kernels region.
>+ (pass_ch_oacc_kernels::execute):
>+ (pass_data_ch_oacc_kernels): New pass_data.
>+ (class pass_ch_oacc_kernels): New pass.
>+ (pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New
>+ function.
>+
> * passes.def: Add pass group pass_oacc_kernels.
> * tree-pass.h (make_pass_oacc_kernels): Declare.
> * tree-ssa-loop.c (gate_oacc_kernels): New static function.
>diff --git gcc/omp-low.c gcc/omp-low.c
>index 16d9a5e..1b03ae6 100644
>--- gcc/omp-low.c
>+++ gcc/omp-low.c
>@@ -13920,4 +13920,95 @@ gimple_stmt_omp_data_i_init_p (gimple stmt)
> SSA_OP_DEF);
> }
>
>+/* Return true if LOOP is inside a kernels region. */
>+
>+bool
>+loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry,
>+ basic_block *region_exit)
Ehm. So why not simply add a flag to struct loop instead and set it
during OMP region parsing/lowering?
Attached patch adds an in_oacc_kernels_region flag to struct loop, and
uses it. OK for gomp-4_0-branch?
Thanks,
- Tom
Add in_oacc_kernels_region field to struct loop
2015-06-03 Tom de Vries <t...@codesourcery.com>
* cfgloop.h (struct loop): Add in_oacc_kernels_region field.
* omp-low.c (mark_loops_in_oacc_kernels_region): New function.
(loop_get_oacc_kernels_region_entry): New function.
(expand_omp_target): Call mark_loops_in_oacc_kernels_region.
(loop_in_oacc_kernels_region_p): Remove function.
* omp-low.h (loop_in_oacc_kernels_region_p): Remove declaration.
(loop_get_oacc_kernels_region_entry): Declare.
* tree-parloops.c (parallelize_loops): Use in_oacc_kernels_region field and
loop_get_oacc_kernels_region_entry.
* tree-ssa-loop-ch.c (pass_ch_execute): Use in_oacc_kernels_region field.
---
gcc/cfgloop.h | 3 +
gcc/omp-low.c | 155 ++++++++++++++++++++-----------------------------
gcc/omp-low.h | 3 +-
gcc/tree-parloops.c | 7 ++-
gcc/tree-ssa-loop-ch.c | 2 +-
5 files changed, 73 insertions(+), 97 deletions(-)
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 1d84572..a3654d9 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -195,6 +195,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
/* True if we should try harder to vectorize this loop. */
bool force_vectorize;
+ /* True if the loop is part of an oacc kernels region. */
+ bool in_oacc_kernels_region;
+
/* For SIMD loops, this is a unique identifier of the loop, referenced
by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE
builtins. */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 01e5d4b..04c1981 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -9421,6 +9421,68 @@ oacc_alloc_broadcast_storage (omp_context *ctx, tree clauses)
ctx, TYPE_SIZE_UNIT (long_long_unsigned_type_node));
}
+/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
+ at REGION_EXIT. */
+
+static void
+mark_loops_in_oacc_kernels_region (basic_block region_entry,
+ basic_block region_exit)
+{
+ bitmap dominated_bitmap = BITMAP_GGC_ALLOC ();
+ bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
+ unsigned di;
+ basic_block bb;
+
+ bitmap_clear (dominated_bitmap);
+ bitmap_clear (excludes_bitmap);
+
+ /* Get all the blocks dominated by the region entry. That will include the
+ entire region. */
+ vec<basic_block> dominated
+ = get_all_dominated_blocks (CDI_DOMINATORS, region_entry);
+ FOR_EACH_VEC_ELT (dominated, di, bb)
+ bitmap_set_bit (dominated_bitmap, bb->index);
+
+ /* Exclude all the blocks which are not in the region: the blocks dominated by
+ the region exit. */
+ if (region_exit != NULL)
+ {
+ vec<basic_block> excludes
+ = get_all_dominated_blocks (CDI_DOMINATORS, region_exit);
+ FOR_EACH_VEC_ELT (excludes, di, bb)
+ bitmap_set_bit (excludes_bitmap, bb->index);
+ }
+
+ /* Mark the loops in the region. */
+ struct loop *loop;
+ FOR_EACH_LOOP (loop, 0)
+ if (bitmap_bit_p (dominated_bitmap, loop->header->index)
+ && !bitmap_bit_p (excludes_bitmap, loop->header->index))
+ loop->in_oacc_kernels_region = true;
+}
+
+/* Return the entry basic block of the oacc kernels region containing LOOP. */
+
+basic_block
+loop_get_oacc_kernels_region_entry (struct loop *loop)
+{
+ if (!loop->in_oacc_kernels_region)
+ return NULL;
+
+ basic_block bb = loop->header;
+ while (true)
+ {
+ bb = get_immediate_dominator (CDI_DOMINATORS, bb);
+ gcc_assert (bb != NULL);
+
+ gimple last = last_stmt (bb);
+ if (last != NULL
+ && gimple_code (last) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (last) == GF_OMP_TARGET_KIND_OACC_KERNELS)
+ return bb;
+ }
+}
+
/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
static void
@@ -9491,6 +9553,8 @@ expand_omp_target (struct omp_region *region)
as an optimization barrier. */
do_splitoff = false;
cfun->curr_properties &= ~PROP_gimple_eomp;
+
+ mark_loops_in_oacc_kernels_region (region->entry, region->exit);
}
else
{
@@ -15164,97 +15228,6 @@ gimple_stmt_omp_data_i_init_p (gimple stmt)
SSA_OP_DEF);
}
-/* Return true if LOOP is inside a kernels region. */
-
-bool
-loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry,
- basic_block *region_exit)
-{
- bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
- bitmap region_bitmap = BITMAP_GGC_ALLOC ();
- bitmap_clear (region_bitmap);
-
- if (region_entry != NULL)
- *region_entry = NULL;
- if (region_exit != NULL)
- *region_exit = NULL;
-
- basic_block bb;
- gimple last;
- FOR_EACH_BB_FN (bb, cfun)
- {
- if (bitmap_bit_p (region_bitmap, bb->index))
- continue;
-
- last = last_stmt (bb);
- if (!last)
- continue;
-
- if (gimple_code (last) != GIMPLE_OMP_TARGET
- || (gimple_omp_target_kind (last) != GF_OMP_TARGET_KIND_OACC_KERNELS))
- continue;
-
- bitmap_clear (excludes_bitmap);
- bitmap_set_bit (excludes_bitmap, bb->index);
-
- vec<basic_block> dominated
- = get_all_dominated_blocks (CDI_DOMINATORS, bb);
-
- unsigned di;
- basic_block dom;
-
- basic_block end_region = NULL;
- FOR_EACH_VEC_ELT (dominated, di, dom)
- {
- if (dom == bb)
- continue;
-
- last = last_stmt (dom);
- if (!last)
- continue;
-
- if (gimple_code (last) != GIMPLE_OMP_RETURN)
- continue;
-
- if (end_region == NULL
- || dominated_by_p (CDI_DOMINATORS, end_region, dom))
- end_region = dom;
- }
-
- if (end_region == NULL)
- {
- gimple kernels = last_stmt (bb);
- fatal_error (gimple_location (kernels),
- "End of kernel region unreachable");
- }
-
- vec<basic_block> excludes
- = get_all_dominated_blocks (CDI_DOMINATORS, end_region);
-
- unsigned di2;
- basic_block exclude;
-
- FOR_EACH_VEC_ELT (excludes, di2, exclude)
- if (exclude != end_region)
- bitmap_set_bit (excludes_bitmap, exclude->index);
-
- FOR_EACH_VEC_ELT (dominated, di, dom)
- if (!bitmap_bit_p (excludes_bitmap, dom->index))
- bitmap_set_bit (region_bitmap, dom->index);
-
- if (bitmap_bit_p (region_bitmap, loop->header->index))
- {
- if (region_entry != NULL)
- *region_entry = bb;
- if (region_exit != NULL)
- *region_exit = end_region;
- return true;
- }
- }
-
- return false;
-}
-
namespace {
const pass_data pass_data_late_lower_omp =
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index ae63c9f..fbc8416 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -29,8 +29,7 @@ extern tree omp_reduction_init (tree, tree);
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
extern void omp_finish_file (void);
extern bool gimple_stmt_omp_data_i_init_p (gimple);
-extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block *,
- basic_block *);
+extern basic_block loop_get_oacc_kernels_region_entry (struct loop *);
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 72877ee..4f193e6 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2629,7 +2629,7 @@ parallelize_loops (bool oacc_kernels_p)
struct obstack parloop_obstack;
HOST_WIDE_INT estimated;
source_location loop_loc;
- basic_block region_entry, region_exit;
+ basic_block region_entry;
/* Do not parallelize loops in the functions created by parallelization. */
if (parallelized_function_p (cfun->decl))
@@ -2649,8 +2649,7 @@ parallelize_loops (bool oacc_kernels_p)
if (oacc_kernels_p)
{
- if (!loop_in_oacc_kernels_region_p (loop, ®ion_entry,
- ®ion_exit))
+ if (!loop->in_oacc_kernels_region)
continue;
/* TODO: Allow nested loops. */
@@ -2661,6 +2660,8 @@ parallelize_loops (bool oacc_kernels_p)
fprintf (dump_file,
"Trying loop %d with header bb %d in oacc kernels region\n",
loop->num, loop->header->index);
+
+ region_entry = loop_get_oacc_kernels_region_entry (loop);
}
if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 1cd77e6..7527efd 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -225,7 +225,7 @@ pass_ch_execute (function *fun, bool oacc_kernels_p)
continue;
if (oacc_kernels_p
- && !loop_in_oacc_kernels_region_p (loop, NULL, NULL))
+ && !loop->in_oacc_kernels_region)
continue;
/* Iterate the header copying up to limit; this takes care of the cases
--
1.9.1