Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Jakub Jelinek
On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote:
> Richard, Jakub,
> this updates patch 1 to use the target-insns.def mechanism of detecting
> conditionally-implemented instructions.  Otherwise it's the same as
> yesterday's patch.  To recap:
> 
> 1) Moved the subcodes to an enumeration in internal-fn.h
> 
> 2) Remove ECF_LEAF
> 
> 3) Added check in initialize_ctrl_altering
> 
> 4) tracer code now (continues) to only look in last stmt of block
> 
> I looked at fnsplit and do not believe I need changes there.  That's
> changing things like:
>   if (cheap test)
> do cheap thing
>   else
> do complex thing
> 
> to break out the else part into a separate function.   That's fine -- it'll
> copy the whole CFG of interest.

The question is if some UNIQUE call could be ever considered as part of the
cheap test or do cheap thing.  If not, everything is fine of course for
fnsplit.

> ok?

Ok for me, but please wait for Richi's ack too.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Nathan Sidwell

This is the patch that was committed.

nathan
2015-10-27  Nathan Sidwell  
	
	* internal-fn.c (expand_UNIQUE): New.
	* internal-fn.h (enum ifn_unique_kind): New.
	* internal-fn.def (IFN_UNIQUE): New.
	* target-insns.def (unique): Define.
	* gimple.h (gimple_call_internal_unique_p): New.
	* gimple.c (gimple_call_same_target_p): Check internal fn
	uniqueness.
	* tracer.c (ignore_bb_p): Check for IFN_UNIQUE call.
	* tree-ssa-threadedge.c
	(record_temporary_equivalences_from_stmts): Likewise.
	* tree-cfg.c (gmple_call_initialize_ctrl_altering): Likewise.

Index: gcc/internal-fn.c
===
--- gcc/internal-fn.c	(revision 229443)
+++ gcc/internal-fn.c	(working copy)
@@ -1958,6 +1958,30 @@ expand_VA_ARG (gcall *stmt ATTRIBUTE_UNU
   gcc_unreachable ();
 }
 
+/* Expand the IFN_UNIQUE function according to its first argument.  */
+
+static void
+expand_UNIQUE (gcall *stmt)
+{
+  rtx pattern = NULL_RTX;
+  enum ifn_unique_kind kind
+= (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
+
+  switch (kind)
+{
+default:
+  gcc_unreachable ();
+
+case IFN_UNIQUE_UNSPEC:
+  if (targetm.have_unique ())
+	pattern = targetm.gen_unique ();
+  break;
+}
+
+  if (pattern)
+emit_insn (pattern);
+}
+
 /* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
 
Index: gcc/internal-fn.h
===
--- gcc/internal-fn.h	(revision 229443)
+++ gcc/internal-fn.h	(working copy)
@@ -20,6 +20,11 @@ along with GCC; see the file COPYING3.
 #ifndef GCC_INTERNAL_FN_H
 #define GCC_INTERNAL_FN_H
 
+/* INTEGER_CST values for IFN_UNIQUE function arg-0.  */
+enum ifn_unique_kind {
+  IFN_UNIQUE_UNSPEC   /* Undifferentiated UNIQUE.  */
+};
+
 /* Initialize internal function tables.  */
 
 extern void init_internal_fns ();
Index: gcc/internal-fn.def
===
--- gcc/internal-fn.def	(revision 229443)
+++ gcc/internal-fn.def	(working copy)
@@ -65,3 +65,10 @@ DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST
 DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
+
+/* An unduplicable, uncombinable function.  Generally used to preserve
+   a CFG property in the face of jump threading, tail merging or
+   other such optimizations.  The first argument distinguishes
+   between uses.  See internal-fn.h for usage.  */
+DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW, NULL)
+
Index: gcc/target-insns.def
===
--- gcc/target-insns.def	(revision 229443)
+++ gcc/target-insns.def	(working copy)
@@ -89,5 +89,6 @@ DEF_TARGET_INSN (stack_protect_test, (rt
 DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
 DEF_TARGET_INSN (trap, (void))
+DEF_TARGET_INSN (unique, (void))
 DEF_TARGET_INSN (untyped_call, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (untyped_return, (rtx x0, rtx x1))
Index: gcc/gimple.c
===
--- gcc/gimple.c	(revision 229443)
+++ gcc/gimple.c	(working copy)
@@ -1346,7 +1346,8 @@ gimple_call_same_target_p (const gimple
 {
   if (gimple_call_internal_p (c1))
 return (gimple_call_internal_p (c2)
-	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2));
+	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2)
+	&& !gimple_call_internal_unique_p (as_a  (c1)));
   else
 return (gimple_call_fn (c1) == gimple_call_fn (c2)
 	|| (gimple_call_fndecl (c1)
Index: gcc/gimple.h
===
--- gcc/gimple.h	(revision 229443)
+++ gcc/gimple.h	(working copy)
@@ -2895,6 +2895,21 @@ gimple_call_internal_fn (const gimple *g
   return gimple_call_internal_fn (gc);
 }
 
+/* Return true, if this internal gimple call is unique.  */
+
+static inline bool
+gimple_call_internal_unique_p (const gcall *gs)
+{
+  return gimple_call_internal_fn (gs) == IFN_UNIQUE;
+}
+
+static inline bool
+gimple_call_internal_unique_p (const gimple *gs)
+{
+  const gcall *gc = GIMPLE_CHECK2 (gs);
+  return gimple_call_internal_unique_p (gc);
+}
+
 /* If CTRL_ALTERING_P is true, mark GIMPLE_CALL S to be a stmt
that could alter control flow.  */
 
Index: gcc/tracer.c
===
--- gcc/tracer.c	(revision 229443)
+++ gcc/tracer.c	(working copy)
@@ -93,18 +93,25 @@ bb_seen_p (basic_block bb)
 static bool
 ignore_bb_p (const_basic_block bb)
 {
-  gimple *g;
-
   if (bb->index < NUM_FIXED_BLOCKS)
 return true;
   if (optimize_bb_for_size_p (bb))
 return true;
 
-  /* A transaction is a single entry multiple exit region.  It must be
- 

Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Richard Biener
On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek  wrote:
> On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote:
>> Richard, Jakub,
>> this updates patch 1 to use the target-insns.def mechanism of detecting
>> conditionally-implemented instructions.  Otherwise it's the same as
>> yesterday's patch.  To recap:
>>
>> 1) Moved the subcodes to an enumeration in internal-fn.h
>>
>> 2) Remove ECF_LEAF
>>
>> 3) Added check in initialize_ctrl_altering
>>
>> 4) tracer code now (continues) to only look in last stmt of block
>>
>> I looked at fnsplit and do not believe I need changes there.  That's
>> changing things like:
>>   if (cheap test)
>> do cheap thing
>>   else
>> do complex thing
>>
>> to break out the else part into a separate function.   That's fine -- it'll
>> copy the whole CFG of interest.
>
> The question is if some UNIQUE call could be ever considered as part of the
> cheap test or do cheap thing.  If not, everything is fine of course for
> fnsplit.
>
>> ok?
>
> Ok for me, but please wait for Richi's ack too.

+  /* An IFN_UNIQUE call must be duplicated as part of its group,
+or not at all.  */
+  if (is_gimple_call (g) && gimple_call_internal_p (g)
+ && gimple_call_internal_unique_p (g))

& always to the next line

Otherwise looks ok to me now.

Thanks,
Richard.

> Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Nathan Sidwell

On 10/27/15 06:45, Richard Biener wrote:

On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek  wrote:



Ok for me, but please wait for Richi's ack too.


+  /* An IFN_UNIQUE call must be duplicated as part of its group,
+or not at all.  */
+  if (is_gimple_call (g) && gimple_call_internal_p (g)
+ && gimple_call_internal_unique_p (g))

& always to the next line


oh, did not know that.


Otherwise looks ok to me now.


Great thanks!

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Jakub Jelinek
On Tue, Oct 27, 2015 at 07:03:40AM -0700, Nathan Sidwell wrote:
> On 10/27/15 06:45, Richard Biener wrote:
> >On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek  wrote:
> 
> >>Ok for me, but please wait for Richi's ack too.
> >
> >+  /* An IFN_UNIQUE call must be duplicated as part of its group,
> >+or not at all.  */
> >+  if (is_gimple_call (g) && gimple_call_internal_p (g)
> >+ && gimple_call_internal_unique_p (g))
> >
> >& always to the next line
> 
> oh, did not know that.

I believe the general rule is if all the conditions are short enough
that everything fits on a single line, you can write it as
  if (a && b && c && d)
but as soon as you need to wrap, it should be one && per line, so
  if (a
  && b
  && c
  && d)
style in that case rather than
  if (a && b
  && c && d)

But, lots of code doesn't do it this way.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Nathan Sidwell

On 10/27/15 01:03, Jakub Jelinek wrote:

On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote:



to break out the else part into a separate function.   That's fine -- it'll
copy the whole CFG of interest.


The question is if some UNIQUE call could be ever considered as part of the
cheap test or do cheap thing.  If not, everything is fine of course for
fnsplit.


It doesn't matter (although I doubt the CFG it's attached to will be considered 
cheap) for how I'm using it.  We never generate a CFG where part of the UNIQUE 
sequence will be in the cheap thing block and another part not in the cheap 
thing  block.


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-26 Thread Nathan Sidwell

Richard, Jakub,
this updates patch 1 to use the target-insns.def mechanism of detecting 
conditionally-implemented instructions.  Otherwise it's the same as yesterday's 
patch.  To recap:


1) Moved the subcodes to an enumeration in internal-fn.h

2) Remove ECF_LEAF

3) Added check in initialize_ctrl_altering

4) tracer code now (continues) to only look in last stmt of block

I looked at fnsplit and do not believe I need changes there.  That's changing 
things like:

  if (cheap test)
do cheap thing
  else
do complex thing

to break out the else part into a separate function.   That's fine -- it'll copy 
the whole CFG of interest.


ok?

nathan
2015-10-26  Nathan Sidwell  
	
	* internal-fn.c (expand_UNIQUE): New.
	* internal-fn.h (enum ifn_unique_kind): New.
	* internal-fn.def (IFN_UNIQUE): New.
	* target-insns.def (unique): Define.
	* gimple.h (gimple_call_internal_unique_p): New.
	* gimple.c (gimple_call_same_target_p): Check internal fn
	uniqueness.
	* tracer.c (ignore_bb_p): Check for IFN_UNIQUE call.
	* tree-ssa-threadedge.c
	(record_temporary_equivalences_from_stmts): Likewise.
	* tree-cfg.c (gmple_call_initialize_ctrl_altering): Likewise.

Index: gcc/target-insns.def
===
--- gcc/target-insns.def	(revision 229276)
+++ gcc/target-insns.def	(working copy)
@@ -89,5 +93,6 @@ DEF_TARGET_INSN (stack_protect_test, (rt
 DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
 DEF_TARGET_INSN (trap, (void))
+DEF_TARGET_INSN (unique, (void))
 DEF_TARGET_INSN (untyped_call, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (untyped_return, (rtx x0, rtx x1))
Index: gcc/gimple.c
===
--- gcc/gimple.c	(revision 229276)
+++ gcc/gimple.c	(working copy)
@@ -1346,7 +1346,8 @@ gimple_call_same_target_p (const gimple
 {
   if (gimple_call_internal_p (c1))
 return (gimple_call_internal_p (c2)
-	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2));
+	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2)
+	&& !gimple_call_internal_unique_p (as_a  (c1)));
   else
 return (gimple_call_fn (c1) == gimple_call_fn (c2)
 	|| (gimple_call_fndecl (c1)
Index: gcc/gimple.h
===
--- gcc/gimple.h	(revision 229276)
+++ gcc/gimple.h	(working copy)
@@ -2895,6 +2895,21 @@ gimple_call_internal_fn (const gimple *g
   return gimple_call_internal_fn (gc);
 }
 
+/* Return true, if this internal gimple call is unique.  */
+
+static inline bool
+gimple_call_internal_unique_p (const gcall *gs)
+{
+  return gimple_call_internal_fn (gs) == IFN_UNIQUE;
+}
+
+static inline bool
+gimple_call_internal_unique_p (const gimple *gs)
+{
+  const gcall *gc = GIMPLE_CHECK2 (gs);
+  return gimple_call_internal_unique_p (gc);
+}
+
 /* If CTRL_ALTERING_P is true, mark GIMPLE_CALL S to be a stmt
that could alter control flow.  */
 
Index: gcc/internal-fn.c
===
--- gcc/internal-fn.c	(revision 229276)
+++ gcc/internal-fn.c	(working copy)
@@ -1958,6 +1958,30 @@ expand_VA_ARG (gcall *stmt ATTRIBUTE_UNU
   gcc_unreachable ();
 }
 
+/* Expand the IFN_UNIQUE function according to its first argument.  */
+
+static void
+expand_UNIQUE (gcall *stmt)
+{
+  rtx pattern = NULL_RTX;
+  enum ifn_unique_kind kind
+= (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
+
+  switch (kind)
+{
+default:
+  gcc_unreachable ();
+
+case IFN_UNIQUE_UNSPEC:
+  if (targetm.have_unique ())
+	pattern = targetm.gen_unique ();
+  break;
+}
+
+  if (pattern)
+emit_insn (pattern);
+}
+
 /* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
 
Index: gcc/internal-fn.h
===
--- gcc/internal-fn.h	(revision 229276)
+++ gcc/internal-fn.h	(working copy)
@@ -20,6 +20,11 @@ along with GCC; see the file COPYING3.
 #ifndef GCC_INTERNAL_FN_H
 #define GCC_INTERNAL_FN_H
 
+/* INTEGER_CST values for IFN_UNIQUE function arg-0.  */
+enum ifn_unique_kind {
+  IFN_UNIQUE_UNSPEC   /* Undifferentiated UNIQUE.  */
+};
+
 /* Initialize internal function tables.  */
 
 extern void init_internal_fns ();
Index: gcc/internal-fn.def
===
--- gcc/internal-fn.def	(revision 229276)
+++ gcc/internal-fn.def	(working copy)
@@ -65,3 +65,10 @@ DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST
 DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
+
+/* An unduplicable, uncombinable function.  Generally used to preserve
+   a CFG property in the face of jump threading, tail merging or
+  

Re: [OpenACC 1/11] UNIQUE internal function

2015-10-25 Thread Nathan Sidwell

Richard, Jakub,
here is an updated patch.  Changes from previous version

1) Moved the subcodes to an enumeration in internal-fn.h

2) Remove ECF_LEAF

3) Added check in initialize_ctrl_altering

4) tracer code now (continues) to only look in last stmt of block

I looked at fnsplit and do not believe I need changes there.  That's changing 
things like:

  if (cheap test)
do cheap thing
  else
do complex thing

to break out the else part into a separate function.   That's fine -- it'll copy 
the whole CFG of interest.


I'll  be posting an updated 7/11 patch shortly.

comments?

nathan
2015-10-25  Nathan Sidwell  
	
	* internal-fn.c (expand_UNIQUE): New.
	* internal-fn.h (enum ifn_unique_kind): New.
	* internal-fn.def (IFN_UNIQUE): New.
	* gimple.h (gimple_call_internal_unique_p): New.
	* gimple.c (gimple_call_same_target_p): Check internal fn
	uniqueness.
	* tracer.c (ignore_bb_p): Check for IFN_UNIQUE call.
	* tree-ssa-threadedge.c
	(record_temporary_equivalences_from_stmts): Likewise.
	* tree-cfg.c (gmple_call_initialize_ctrl_altering): Likewise.

Index: gcc/tree-ssa-threadedge.c
===
--- gcc/tree-ssa-threadedge.c	(revision 229276)
+++ gcc/tree-ssa-threadedge.c	(working copy)
@@ -283,6 +283,17 @@ record_temporary_equivalences_from_stmts
 	  && gimple_asm_volatile_p (as_a  (stmt)))
 	return NULL;
 
+  /* If the statement is a unique builtin, we can not thread
+	 through here.  */
+  if (gimple_code (stmt) == GIMPLE_CALL)
+	{
+	  gcall *call = as_a  (stmt);
+
+	  if (gimple_call_internal_p (call)
+	  && gimple_call_internal_unique_p (call))
+	return NULL;
+	}
+
   /* If duplicating this block is going to cause too much code
 	 expansion, then do not thread through this block.  */
   stmt_count++;
Index: gcc/internal-fn.def
===
--- gcc/internal-fn.def	(revision 229276)
+++ gcc/internal-fn.def	(working copy)
@@ -65,3 +65,10 @@ DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST
 DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
+
+/* An unduplicable, uncombinable function.  Generally used to preserve
+   a CFG property in the face of jump threading, tail merging or
+   other such optimizations.  The first argument distinguishes
+   between uses. See internal-fn.h for usage.  */
+DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW, NULL)
Index: gcc/gimple.c
===
--- gcc/gimple.c	(revision 229276)
+++ gcc/gimple.c	(working copy)
@@ -1346,7 +1346,8 @@ gimple_call_same_target_p (const gimple
 {
   if (gimple_call_internal_p (c1))
 return (gimple_call_internal_p (c2)
-	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2));
+	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2)
+	&& !gimple_call_internal_unique_p (as_a  (c1)));
   else
 return (gimple_call_fn (c1) == gimple_call_fn (c2)
 	|| (gimple_call_fndecl (c1)
Index: gcc/gimple.h
===
--- gcc/gimple.h	(revision 229276)
+++ gcc/gimple.h	(working copy)
@@ -2895,6 +2895,21 @@ gimple_call_internal_fn (const gimple *g
   return gimple_call_internal_fn (gc);
 }
 
+/* Return true, if this internal gimple call is unique.  */
+
+static inline bool
+gimple_call_internal_unique_p (const gcall *gs)
+{
+  return gimple_call_internal_fn (gs) == IFN_UNIQUE;
+}
+
+static inline bool
+gimple_call_internal_unique_p (const gimple *gs)
+{
+  const gcall *gc = GIMPLE_CHECK2 (gs);
+  return gimple_call_internal_unique_p (gc);
+}
+
 /* If CTRL_ALTERING_P is true, mark GIMPLE_CALL S to be a stmt
that could alter control flow.  */
 
Index: gcc/internal-fn.c
===
--- gcc/internal-fn.c	(revision 229276)
+++ gcc/internal-fn.c	(working copy)
@@ -1958,6 +1958,30 @@ expand_VA_ARG (gcall *stmt ATTRIBUTE_UNU
   gcc_unreachable ();
 }
 
+/* Expand the IFN_UNIQUE function according to its first argument.  */
+
+static void
+expand_UNIQUE (gcall *stmt)
+{
+  rtx pattern = NULL_RTX;
+  int code = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
+
+  switch (code)
+{
+default:
+  gcc_unreachable ();
+
+case IFN_UNIQUE_UNSPEC:
+#ifdef HAVE_unique
+  pattern = gen_unique ();
+#endif
+  break;
+}
+
+  if (pattern)
+emit_insn (pattern);
+}
+
 /* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
 
Index: gcc/internal-fn.h
===
--- gcc/internal-fn.h	(revision 229276)
+++ gcc/internal-fn.h	(working copy)
@@ -20,6 +20,11 @@ along with GCC; see the file COPYING3.
 #ifndef GCC_INTERNAL_FN_H
 #define 

Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Richard Biener
On Thu, Oct 22, 2015 at 8:06 PM, Nathan Sidwell  wrote:
> On 10/22/15 10:26, Richard Biener wrote:
>>
>> On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:
>>>
>>> On 10/22/15 04:07, Richard Biener wrote:
>>>
 Yeah, please make them either end or start a BB so we have to check
 at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
 it also makes it a code motion barrier.
>>>
>>>
>>>
>>> Just so I'm clear, you're not saying that RETURNS_TWICE will stop the
>>> call
>>> being duplicated though?
>>
>>
>> It will in practice.  RETURNS_TWICE will get you an abnormal edge from
>> entry (I think)
>
>
> Won't that interfere with the OMP  machinery, which expects correctly nested
> loops?  (no in-to or out-of loop jumps)

Probably yes.

> nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Nathan Sidwell

On 10/23/15 04:25, Jakub Jelinek wrote:

On Thu, Oct 22, 2015 at 02:06:54PM -0400, Nathan Sidwell wrote:

On 10/22/15 10:26, Richard Biener wrote:

On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:

On 10/22/15 04:07, Richard Biener wrote:


Yeah, please make them either end or start a BB so we have to check
at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
it also makes it a code motion barrier.



Just so I'm clear, you're not saying that RETURNS_TWICE will stop the call
being duplicated though?


It will in practice.  RETURNS_TWICE will get you an abnormal edge from
entry (I think)


Won't that interfere with the OMP  machinery, which expects correctly nested
loops?  (no in-to or out-of loop jumps)


I bet it will, the region with the abnormal edges is no longer SESE.


Hm, it seems like a bad plan to try RETURNS_TWICE then.



If you want to force end of a BB after the IFN_UNIQUE call, then you can just
gimple_call_set_ctrl_altering (gcall, true);
on it, and probably tweak gimple_call_initialize_ctrl_altering
so that it does that by default.  Plus of course split the blocks after it
when you emit it.


IIUC this won't require RETURNS_TWICE, correct?  We're generate these seqs to a 
gimple sequence that eventually gets attached to the graph at the end of lower 
omp_for with:


  gimple_bind_set_body (new_stmt, body);
  gimple_omp_set_body (stmt, NULL);
  gimple_omp_for_set_pre_body (stmt, NULL);

Presumably that sequence will have to be split in the manner you describe 
somewhere else.  Not sure where that might be?


Any thoughts on the approach of adding a flag to struct function, and having 
tracer to skip such functions?


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Nathan Sidwell

On 10/23/15 09:03, Richard Biener wrote:


It's a hack.  I don't like hacks.


One person's hack can be another person's pragmatism :)


 I think the requirement "don't duplicate me"
but inlining is ok is somewhat broken.


The requirement is that the SESE region formed by the markers remains as an SESE 
region with those markers as the entry & exit paths. We don't have a way of 
expressing exactly that in the compiler.  What we do have is the ability to say 
'don't duplicate this insn'.



The requirement seems to be
sth like the "important" paris of such functions need to dominate/post-dominate
each other (technically not even in the same function)?


You're correct that the SESE region could be split across a function boundary in 
the manner you describe, but the  complexity of dealing with that in the 
backend's partitioning code would be high.  Let's not try and enable that from 
the get-go.


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Richard Biener
On Fri, Oct 23, 2015 at 2:57 PM, Nathan Sidwell  wrote:
> On 10/23/15 04:25, Jakub Jelinek wrote:
>>
>> On Thu, Oct 22, 2015 at 02:06:54PM -0400, Nathan Sidwell wrote:
>>>
>>> On 10/22/15 10:26, Richard Biener wrote:

 On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:
>
> On 10/22/15 04:07, Richard Biener wrote:
>
>> Yeah, please make them either end or start a BB so we have to check
>> at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
>> it also makes it a code motion barrier.
>
>
>
> Just so I'm clear, you're not saying that RETURNS_TWICE will stop the
> call
> being duplicated though?


 It will in practice.  RETURNS_TWICE will get you an abnormal edge from
 entry (I think)
>>>
>>>
>>> Won't that interfere with the OMP  machinery, which expects correctly
>>> nested
>>> loops?  (no in-to or out-of loop jumps)
>>
>>
>> I bet it will, the region with the abnormal edges is no longer SESE.
>
>
> Hm, it seems like a bad plan to try RETURNS_TWICE then.
>
>
>> If you want to force end of a BB after the IFN_UNIQUE call, then you can
>> just
>> gimple_call_set_ctrl_altering (gcall, true);
>> on it, and probably tweak gimple_call_initialize_ctrl_altering
>> so that it does that by default.  Plus of course split the blocks after it
>> when you emit it.
>
>
> IIUC this won't require RETURNS_TWICE, correct?  We're generate these seqs
> to a gimple sequence that eventually gets attached to the graph at the end
> of lower omp_for with:
>
>   gimple_bind_set_body (new_stmt, body);
>   gimple_omp_set_body (stmt, NULL);
>   gimple_omp_for_set_pre_body (stmt, NULL);
>
> Presumably that sequence will have to be split in the manner you describe
> somewhere else.  Not sure where that might be?
>
> Any thoughts on the approach of adding a flag to struct function, and having
> tracer to skip such functions?

It's a hack.  I don't like hacks.  I think the requirement "don't duplicate me"
but inlining is ok is somewhat broken.  The requirement seems to be
sth like the "important" paris of such functions need to dominate/post-dominate
each other (technically not even in the same function)?

Richard.

> nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Jakub Jelinek
On Fri, Oct 23, 2015 at 08:57:17AM -0400, Nathan Sidwell wrote:
> >If you want to force end of a BB after the IFN_UNIQUE call, then you can just
> >gimple_call_set_ctrl_altering (gcall, true);
> >on it, and probably tweak gimple_call_initialize_ctrl_altering
> >so that it does that by default.  Plus of course split the blocks after it
> >when you emit it.
> 
> IIUC this won't require RETURNS_TWICE, correct?  We're generate these seqs

It doesn't require that, sure.

> to a gimple sequence that eventually gets attached to the graph at the end
> of lower omp_for with:
> 
>   gimple_bind_set_body (new_stmt, body);
>   gimple_omp_set_body (stmt, NULL);
>   gimple_omp_for_set_pre_body (stmt, NULL);
> 
> Presumably that sequence will have to be split in the manner you describe
> somewhere else.  Not sure where that might be?

If this is during the omplower pass, then it is before cfg pass and
therefore all you need is tweak the gimple_call_initialize_ctrl_altering
function and the cfg pass will DTRT.

> Any thoughts on the approach of adding a flag to struct function, and having
> tracer to skip such functions?

It could still be expensive if functions with that flag set contain very
large basic blocks.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Nathan Sidwell

On 10/23/15 09:03, Jakub Jelinek wrote:

On Fri, Oct 23, 2015 at 08:57:17AM -0400, Nathan Sidwell wrote:



If this is during the omplower pass, then it is before cfg pass and
therefore all you need is tweak the gimple_call_initialize_ctrl_altering
function and the cfg pass will DTRT.


ok, thanks

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Jakub Jelinek
On Fri, Oct 23, 2015 at 09:13:43AM -0400, Nathan Sidwell wrote:
> You're correct that the SESE region could be split across a function
> boundary in the manner you describe, but the  complexity of dealing with
> that in the backend's partitioning code would be high.  Let's not try and
> enable that from the get-go.

Sure, but then you probably need to tweak the fnsplit pass to guarantee
that.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Nathan Sidwell

On 10/23/15 09:16, Jakub Jelinek wrote:

On Fri, Oct 23, 2015 at 09:13:43AM -0400, Nathan Sidwell wrote:

You're correct that the SESE region could be split across a function
boundary in the manner you describe, but the  complexity of dealing with
that in the backend's partitioning code would be high.  Let's not try and
enable that from the get-go.


Sure, but then you probably need to tweak the fnsplit pass to guarantee
that.


Ok, I'll take a look at that too.

The gimple_call_set_ctrl_altering approach is looking good for the moment.

Richard, if that works out, so we only have to check unique_p on the last insn 
of a bb, does that satisfy your concerns?  (Of course I'll repost patch 1 for 
review).


WRT the other patches I think the status is:

01-trunk-unique.patch
  Internal function with a 'uniqueness' property
  * reworking as described.
02-trunk-nvptx-partition.patch
  NVPTX backend patch set for partitioned execution
  * approved with minor edits
03-trunk-hook.patch
  OpenACC hook
  * approved with minor edit
04-trunk-c.patch
  C FE changes
  * Being addressed by Cesar
05-trunk-cxx.patch
  C++ FE changes
  * Being addressed by Cesar
06-trunk-red-init.patch
  Placeholder to keep reductions functioning
  * Approved
07-trunk-loop-mark.patch
  Annotate OpenACC loops in device-agnostic manner
  * Addressing minor comments
08-trunk-dev-lower.patch
  Device-specific lowering of loop markers
  * Question asked & answered about non-ptx behaviour
09-trunk-lower-gate.patch
  Run oacc_device_lower pass regardless of errors
  * Approved
10-trunk-libgomp.patch
  Libgomp change (remove dimension check)
  * Approved
11-trunk-tests.patch
  Initial set of execution tests
  * Approved, but C& C++ error tests needed

I'll repost:
01-trunk-unique.patch
  Internal function with a 'uniqueness' property

That has some obvious knock on changes to 02, 07 and 08, do you want those 
reposted for review?


Cesar will repost:
04-trunk-c.patch
  C FE changes
05-trunk-cxx.patch
  C++ FE changes

The remaining patch:
08-trunk-dev-lower.patch
  Device-specific lowering of loop markers

seems to be waiting on Jakub?

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Jakub Jelinek
On Thu, Oct 22, 2015 at 04:17:32PM -0400, Nathan Sidwell wrote:
> On 10/22/15 04:04, Jakub Jelinek wrote:
> 
> >>+  /* Ignore blocks containing non-clonable function calls.  */
> >>+  for (gsi = gsi_start_bb (CONST_CAST_BB (bb));
> >>+   !gsi_end_p (gsi); gsi_next ())
> >>+{
> >>+  g = gsi_stmt (gsi);
> >>+
> >>+  if (is_gimple_call (g) && gimple_call_internal_p (g)
> >>+ && gimple_call_internal_unique_p (as_a  (g)))
> >>+   return true;
> >>+}
> >
> >Do you have to scan the whole bb?  E.g. don't or should not those
> >unique IFNs force end of bb?
> 
> What about adding a flag to struct function?
> 
>   /* Nonzero if this function contains IFN_UNIQUE markers.  */
>   unsigned int has_unique_calls : 1;
> 
> Then the tracer could either skip it, or do the search?
> 
> (I notice there are cilk flags already in struct function, instead of the
> above, we could add an openacc-specific one with  a similar behaviour?)

If you want to force end of a BB after the IFN_UNIQUE call, then you can just
gimple_call_set_ctrl_altering (gcall, true);
on it, and probably tweak gimple_call_initialize_ctrl_altering
so that it does that by default.  Plus of course split the blocks after it
when you emit it.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-23 Thread Jakub Jelinek
On Thu, Oct 22, 2015 at 02:06:54PM -0400, Nathan Sidwell wrote:
> On 10/22/15 10:26, Richard Biener wrote:
> >On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:
> >>On 10/22/15 04:07, Richard Biener wrote:
> >>
> >>>Yeah, please make them either end or start a BB so we have to check
> >>>at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
> >>>it also makes it a code motion barrier.
> >>
> >>
> >>Just so I'm clear, you're not saying that RETURNS_TWICE will stop the call
> >>being duplicated though?
> >
> >It will in practice.  RETURNS_TWICE will get you an abnormal edge from
> >entry (I think)
> 
> Won't that interfere with the OMP  machinery, which expects correctly nested
> loops?  (no in-to or out-of loop jumps)

I bet it will, the region with the abnormal edges is no longer SESE.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 10:26, Richard Biener wrote:

On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:

On 10/22/15 04:07, Richard Biener wrote:


Yeah, please make them either end or start a BB so we have to check
at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
it also makes it a code motion barrier.



Just so I'm clear, you're not saying that RETURNS_TWICE will stop the call
being duplicated though?


It will in practice.  RETURNS_TWICE will get you an abnormal edge from
entry (I think)


Won't that interfere with the OMP  machinery, which expects correctly nested 
loops?  (no in-to or out-of loop jumps)


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Jakub Jelinek
On Thu, Oct 22, 2015 at 09:49:29AM +0200, Richard Biener wrote:
> >> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct internal
> >> fns.  This replaces that scheme.
> >>
> >> ok?
> >
> > Hmm, I'd just have used gimple_has_volatile_ops on the call?  That
> > should have the
> > desired effects.
> 
> That is, whatever new IFNs you need are ok, but special-casing them is not
> necessary if you properly mark the calls as volatile.

I don't see gimple_has_volatile_ops used in tracer.c or
tree-ssa-threadedge.c.  Setting gimple_has_volatile_ops on those IFNs is
fine, but I think they are even stronger than that.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Richard Biener
On Thu, Oct 22, 2015 at 10:04 AM, Jakub Jelinek  wrote:
> On Wed, Oct 21, 2015 at 03:00:47PM -0400, Nathan Sidwell wrote:
>> To distinguish different uses of the UNIQUE function, I use the first
>> argument, which is expected to be an INTEGER_CST.  I figured this better
>> than using multiple new internal fns, all with the unique property, as the
>> latter would need (at least) a range check in gimple_call_internal_unique_p
>> rather than a simple equality.
>>
>> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct internal
>> fns.  This replaces that scheme.
>>
>> ok?
>>
>> nathan
>
>> 2015-10-20  Nathan Sidwell  
>>   Cesar Philippidis  
>>
>>   * internal-fn.c (expand_UNIQUE): New.
>>   * internal-fn.def (IFN_UNIQUE): New.
>>   (IFN_UNIQUE_UNSPEC): Define.
>>   * gimple.h (gimple_call_internal_unique_p): New.
>>   * gimple.c (gimple_call_same_target_p): Check internal fn
>>   uniqueness.
>>   * tracer.c (ignore_bb_p): Check for IFN_UNIQUE call.
>>   * tree-ssa-threadedge.c
>>   (record_temporary_equivalences_from_stmts): Likewise.
>
> This is generally fine with me, but please work with Richi to find
> something acceptable to him too.
>
>> +DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | ECF_LEAF, NULL)
>
> Are you sure about the ECF_LEAF?  I mean, while the function can't
> call back to your code, I'd expect you want it as kind of strong
> optimization barrier too.
>
>> +#define IFN_UNIQUE_UNSPEC 0  /* Undifferentiated UNIQUE.  */
>> Index: tracer.c
>> ===
>> --- tracer.c  (revision 229096)
>> +++ tracer.c  (working copy)
>> @@ -93,6 +93,7 @@ bb_seen_p (basic_block bb)
>>  static bool
>>  ignore_bb_p (const_basic_block bb)
>>  {
>> +  gimple_stmt_iterator gsi;
>>gimple *g;
>>
>>if (bb->index < NUM_FIXED_BLOCKS)
>> @@ -106,6 +107,17 @@ ignore_bb_p (const_basic_block bb)
>>if (g && gimple_code (g) == GIMPLE_TRANSACTION)
>>  return true;
>>
>> +  /* Ignore blocks containing non-clonable function calls.  */
>> +  for (gsi = gsi_start_bb (CONST_CAST_BB (bb));
>> +   !gsi_end_p (gsi); gsi_next ())
>> +{
>> +  g = gsi_stmt (gsi);
>> +
>> +  if (is_gimple_call (g) && gimple_call_internal_p (g)
>> +   && gimple_call_internal_unique_p (as_a  (g)))
>> + return true;
>> +}
>
> Do you have to scan the whole bb?  E.g. don't or should not those
> unique IFNs force end of bb?

Yeah, please make them either end or start a BB so we have to check
at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
it also makes it a code motion barrier.

Richard.

> Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Jakub Jelinek
On Wed, Oct 21, 2015 at 03:00:47PM -0400, Nathan Sidwell wrote:
> To distinguish different uses of the UNIQUE function, I use the first
> argument, which is expected to be an INTEGER_CST.  I figured this better
> than using multiple new internal fns, all with the unique property, as the
> latter would need (at least) a range check in gimple_call_internal_unique_p
> rather than a simple equality.
> 
> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct internal
> fns.  This replaces that scheme.
> 
> ok?
> 
> nathan

> 2015-10-20  Nathan Sidwell  
>   Cesar Philippidis  
>   
>   * internal-fn.c (expand_UNIQUE): New.
>   * internal-fn.def (IFN_UNIQUE): New.
>   (IFN_UNIQUE_UNSPEC): Define.
>   * gimple.h (gimple_call_internal_unique_p): New.
>   * gimple.c (gimple_call_same_target_p): Check internal fn
>   uniqueness.
>   * tracer.c (ignore_bb_p): Check for IFN_UNIQUE call.
>   * tree-ssa-threadedge.c
>   (record_temporary_equivalences_from_stmts): Likewise.

This is generally fine with me, but please work with Richi to find
something acceptable to him too.

> +DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | ECF_LEAF, NULL)

Are you sure about the ECF_LEAF?  I mean, while the function can't
call back to your code, I'd expect you want it as kind of strong
optimization barrier too.

> +#define IFN_UNIQUE_UNSPEC 0  /* Undifferentiated UNIQUE.  */
> Index: tracer.c
> ===
> --- tracer.c  (revision 229096)
> +++ tracer.c  (working copy)
> @@ -93,6 +93,7 @@ bb_seen_p (basic_block bb)
>  static bool
>  ignore_bb_p (const_basic_block bb)
>  {
> +  gimple_stmt_iterator gsi;
>gimple *g;
>  
>if (bb->index < NUM_FIXED_BLOCKS)
> @@ -106,6 +107,17 @@ ignore_bb_p (const_basic_block bb)
>if (g && gimple_code (g) == GIMPLE_TRANSACTION)
>  return true;
>  
> +  /* Ignore blocks containing non-clonable function calls.  */
> +  for (gsi = gsi_start_bb (CONST_CAST_BB (bb));
> +   !gsi_end_p (gsi); gsi_next ())
> +{
> +  g = gsi_stmt (gsi);
> +
> +  if (is_gimple_call (g) && gimple_call_internal_p (g)
> +   && gimple_call_internal_unique_p (as_a  (g)))
> + return true;
> +}

Do you have to scan the whole bb?  E.g. don't or should not those
unique IFNs force end of bb?

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Richard Biener
On Thu, Oct 22, 2015 at 9:59 AM, Jakub Jelinek  wrote:
> On Thu, Oct 22, 2015 at 09:49:29AM +0200, Richard Biener wrote:
>> >> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct 
>> >> internal
>> >> fns.  This replaces that scheme.
>> >>
>> >> ok?
>> >
>> > Hmm, I'd just have used gimple_has_volatile_ops on the call?  That
>> > should have the
>> > desired effects.
>>
>> That is, whatever new IFNs you need are ok, but special-casing them is not
>> necessary if you properly mark the calls as volatile.
>
> I don't see gimple_has_volatile_ops used in tracer.c or
> tree-ssa-threadedge.c.  Setting gimple_has_volatile_ops on those IFNs is
> fine, but I think they are even stronger than that.

Hmm, indeed.  Now I fail to see how the implemented property "preserves
the CFG looping structure".  And I would have expected can_copy_bbs_p
to be adjusted instead (catching more cases and the threading and tracer
case as well).

As far as I can see nothing would prevent dissolving the loop by completely
unolling it for example.  Or deleting it because it has no side-effects.

So you'd need to be more precise as to what properties you are trying to
preserve by placing a single stmt somewhere.

Richard.

> Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 04:07, Richard Biener wrote:


Yeah, please make them either end or start a BB so we have to check
at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
it also makes it a code motion barrier.


I'm having a hard time making  UNIQUE the end of a BB.

I'm emitting code to a gimple sequence, which later gets processed by the OMP 
machinery.  Just doing that doesn't cause the block to be split after the 
ECF_RETURNS_TWICE function.


In all the below, the label is generated by:

  tree label = create_artificial_label (loc);
  FORCED_LABEL (label) = 1;

I tried doing
  UNIQUE (...)
  goto label
  label:

but that is apparently optimized away, leaving UNIQUE in the middle of a bb. 
Next I tried:


  UNIQUE (..., )
  goto label
label:

but the goto is elided and label migrates to the start of the bb, again leaving 
UNIQUE in the middle.


Any suggestions?

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Richard Biener
On Thu, Oct 22, 2015 at 9:48 AM, Richard Biener
 wrote:
> On Wed, Oct 21, 2015 at 9:00 PM, Nathan Sidwell  wrote:
>> This patch implements a new internal function that has a 'uniqueness'
>> property.   Jump-threading cannot clone it and tail-merging cannot combine
>> multiple instances.
>>
>> The uniqueness is implemented by a new gimple fn,
>> gimple_call_internal_unique_p.  Routines that check for identical or
>> cloneable calls are augmented to check this property.  These are:
>>
>> * tree-ssa-threadedge, which is figuring out if jump threading is a win.
>> Jump threading is inhibited.
>>
>> * gimple_call_same_target_p, used for tail merging and similar transforms.
>> Two calls of IFN_UNIQUE will never be  the same target.
>>
>> * tracer.c, which is determining whether to clone a region.
>>
>> Interestingly jump threading avoids cloning volatile asms (which it admits
>> is conservatively safe), but the tracer does not. I wonder if there's a
>> latent problem in tracer?
>>
>> The reason I needed a function with this property is to  preserve the
>> looping structure of a function's CFG.  As mentioned in the intro, we mark
>> up loops (using this builtin), so the example I gave has the following
>> inserts:
>>
>> #pragma acc parallel ...
>> {
>>  // single mode here
>> #pragma acc loop ...
>> IFN_UNIQUE (FORKING  ...)
>> for (i = 0; i < N; i++) // loop 1
>>   ... // partitioned mode here
>> IFN_UNIQUE (JOINING ...)
>>
>> if (expr) // single mode here
>> #pragma acc loop ...
>>   IFN_UNIQUE (FORKING ...)
>>   for (i = 0; i < N; i++) // loop 2
>> ... // partitioned mode here
>>   IFN_UNIQUE (JOINING ...)
>> }
>>
>> The properly nested loop property of the CFG is preserved through the
>> compilation.  This is important as (a) it allows later passes to reconstruct
>> this looping structure and (b) hardware constraints require a partioned
>> region end for all partitioned threads at a single instruction.
>>
>> Until I added this unique property, original bring-up  of partitioned
>> execution would hit cases of split loops ending in multiple cloned JOINING
>> markers and similar cases.
>>
>> To distinguish different uses of the UNIQUE function, I use the first
>> argument, which is expected to be an INTEGER_CST.  I figured this better
>> than using multiple new internal fns, all with the unique property, as the
>> latter would need (at least) a range check in gimple_call_internal_unique_p
>> rather than a simple equality.
>>
>> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct internal
>> fns.  This replaces that scheme.
>>
>> ok?
>
> Hmm, I'd just have used gimple_has_volatile_ops on the call?  That
> should have the
> desired effects.

That is, whatever new IFNs you need are ok, but special-casing them is not
necessary if you properly mark the calls as volatile.

Richard.

> Richard.
>
>> nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Richard Biener
On Wed, Oct 21, 2015 at 9:00 PM, Nathan Sidwell  wrote:
> This patch implements a new internal function that has a 'uniqueness'
> property.   Jump-threading cannot clone it and tail-merging cannot combine
> multiple instances.
>
> The uniqueness is implemented by a new gimple fn,
> gimple_call_internal_unique_p.  Routines that check for identical or
> cloneable calls are augmented to check this property.  These are:
>
> * tree-ssa-threadedge, which is figuring out if jump threading is a win.
> Jump threading is inhibited.
>
> * gimple_call_same_target_p, used for tail merging and similar transforms.
> Two calls of IFN_UNIQUE will never be  the same target.
>
> * tracer.c, which is determining whether to clone a region.
>
> Interestingly jump threading avoids cloning volatile asms (which it admits
> is conservatively safe), but the tracer does not. I wonder if there's a
> latent problem in tracer?
>
> The reason I needed a function with this property is to  preserve the
> looping structure of a function's CFG.  As mentioned in the intro, we mark
> up loops (using this builtin), so the example I gave has the following
> inserts:
>
> #pragma acc parallel ...
> {
>  // single mode here
> #pragma acc loop ...
> IFN_UNIQUE (FORKING  ...)
> for (i = 0; i < N; i++) // loop 1
>   ... // partitioned mode here
> IFN_UNIQUE (JOINING ...)
>
> if (expr) // single mode here
> #pragma acc loop ...
>   IFN_UNIQUE (FORKING ...)
>   for (i = 0; i < N; i++) // loop 2
> ... // partitioned mode here
>   IFN_UNIQUE (JOINING ...)
> }
>
> The properly nested loop property of the CFG is preserved through the
> compilation.  This is important as (a) it allows later passes to reconstruct
> this looping structure and (b) hardware constraints require a partioned
> region end for all partitioned threads at a single instruction.
>
> Until I added this unique property, original bring-up  of partitioned
> execution would hit cases of split loops ending in multiple cloned JOINING
> markers and similar cases.
>
> To distinguish different uses of the UNIQUE function, I use the first
> argument, which is expected to be an INTEGER_CST.  I figured this better
> than using multiple new internal fns, all with the unique property, as the
> latter would need (at least) a range check in gimple_call_internal_unique_p
> rather than a simple equality.
>
> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct internal
> fns.  This replaces that scheme.
>
> ok?

Hmm, I'd just have used gimple_has_volatile_ops on the call?  That
should have the
desired effects.

Richard.

> nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Julian Brown
On Thu, 22 Oct 2015 10:05:30 +0200
Richard Biener  wrote:

> On Thu, Oct 22, 2015 at 9:59 AM, Jakub Jelinek 
> wrote:
> > On Thu, Oct 22, 2015 at 09:49:29AM +0200, Richard Biener wrote:  
> >> >> Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such
> >> >> distinct internal fns.  This replaces that scheme.
> >> >>
> >> >> ok?  
> >> >
> >> > Hmm, I'd just have used gimple_has_volatile_ops on the call?
> >> > That should have the
> >> > desired effects.  
> >>
> >> That is, whatever new IFNs you need are ok, but special-casing
> >> them is not necessary if you properly mark the calls as volatile.  
> >
> > I don't see gimple_has_volatile_ops used in tracer.c or
> > tree-ssa-threadedge.c.  Setting gimple_has_volatile_ops on those
> > IFNs is fine, but I think they are even stronger than that.  
> 
> Hmm, indeed.  Now I fail to see how the implemented property
> "preserves the CFG looping structure".  And I would have expected
> can_copy_bbs_p to be adjusted instead (catching more cases and the
> threading and tracer case as well).
> 
> As far as I can see nothing would prevent dissolving the loop by
> completely unolling it for example.  Or deleting it because it has no
> side-effects.
> 
> So you'd need to be more precise as to what properties you are trying
> to preserve by placing a single stmt somewhere.

FWIW an earlier, abandoned attempt at solving the same problem was
discussed in the following thread, continuing through June:

  https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02647.html

Though the details of lowering of OpenACC constructs have changed with
Nathan's current patches, the underlying problem remains the same. PTX
requires certain operations (bar.sync) to be executed uniformly by all
threads in a CTA. IIUC this affects "JOIN" points across all
workers/vectors in a gang, in particular (though this is generic code,
other -- particularly GPU -- targets may have similar restrictions).

HTH,

Julian


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 04:04, Jakub Jelinek wrote:


+  /* Ignore blocks containing non-clonable function calls.  */
+  for (gsi = gsi_start_bb (CONST_CAST_BB (bb));
+   !gsi_end_p (gsi); gsi_next ())
+{
+  g = gsi_stmt (gsi);
+
+  if (is_gimple_call (g) && gimple_call_internal_p (g)
+ && gimple_call_internal_unique_p (as_a  (g)))
+   return true;
+}


Do you have to scan the whole bb?  E.g. don't or should not those
unique IFNs force end of bb?


What about adding a flag to struct function?

  /* Nonzero if this function contains IFN_UNIQUE markers.  */
  unsigned int has_unique_calls : 1;

Then the tracer could either skip it, or do the search?

(I notice there are cilk flags already in struct function, instead of the above, 
we could add an openacc-specific one with  a similar behaviour?)


nathan



Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Richard Biener
On Thu, Oct 22, 2015 at 3:24 PM, Nathan Sidwell  wrote:
> On 10/22/15 09:17, Jakub Jelinek wrote:
>>
>> On Thu, Oct 22, 2015 at 09:08:30AM -0400, Nathan Sidwell wrote:
>
>
>> I agree with Richard that it would be better to write more about what kind
>> of IL changes are acceptable with IFN_UNIQUE in the IL and what are not.
>> E.g. is inlining ok (I'd hope yes)?  Is function splitting ok (bet as long
>> as all IFN_UNIQUE calls stay in one or the other part, but not both)?
>
>
> Essentially, yes.  a set of IFN_UNIQUE form a group  which must not be
> separated  from each other.  The set is discovered implicitly by following
> the CFG (though I suppose we could add an identifying INT_CST operand or
> something equivalent).

I don't see how this is achieved though.  To achieve this you'd need data
dependences between them, sth like

token_1 = IFN_UNIQUE (HEAD);
...
token_2 = IFN_UNIQUE (TAIL, token_1);

not sure if that is enough (what is "separate from each other"?), for example
partial inlining might simply pass token_1 to the split part where only
IFN_UNIQUE (TAIL, token_1) would be in.  At least the above provides
ordering between the two IFN calls (which you achieve by having VDEFs
I guess, but then they are also barriers for memory optimizations).

Richard.

> nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Richard Biener
On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:
> On 10/22/15 04:07, Richard Biener wrote:
>
>> Yeah, please make them either end or start a BB so we have to check
>> at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
>> it also makes it a code motion barrier.
>
>
> Just so I'm clear, you're not saying that RETURNS_TWICE will stop the call
> being duplicated though?

It will in practice.  RETURNS_TWICE will get you an abnormal edge from
entry (I think)

> thinking a little further, a code motion barrier is stronger than I need
> (but conservatively safe).  For instance:
>
> UNIQUE (HEAD)
> for (...)
> {
>   a = 
> }
> UNIQUE (TAIL)
>
> It would be safe and desirable to move that loop invariant to before the
> UNIQUE.  Perhaps it won't matter in practice -- after all having N physical
> threads calculate it in parallel (just after the HEAD marker, but before the
> loop) will probably take no longer than a single thread doing it while the
> others wait.[*]

RETURNS_TWICE will make the invariant motion stop at UNIQUE (HEAD),
but it would have done that anyway.  It will also be a CSE barrier, thus

tem = global;
UNIQUE(HEAD)
tem2 = global;

will not CSE tem2 to tem.

Richard.

> nathan
>
> [*] ut it will take more power.


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 10:30, Richard Biener wrote:

On Thu, Oct 22, 2015 at 3:24 PM, Nathan Sidwell  wrote:


Essentially, yes.  a set of IFN_UNIQUE form a group  which must not be
separated  from each other.  The set is discovered implicitly by following
the CFG (though I suppose we could add an identifying INT_CST operand or
something equivalent).


I don't see how this is achieved though.


Well, in practice it does.


 To achieve this you'd need data
dependences between them, sth like

token_1 = IFN_UNIQUE (HEAD);
...
token_2 = IFN_UNIQUE (TAIL, token_1);

not sure if that is enough (what is "separate from each other"?), for example
partial inlining might simply pass token_1 to the split part where only
IFN_UNIQUE (TAIL, token_1) would be in.


Yeah, such partial inlining will break.  Not encountered it happening though.


At least the above provides
ordering between the two IFN calls (which you achieve by having VDEFs
I guess, but then they are also barriers for memory optimizations).


Right, I think I'm relying on the compiler's lack of knowledge about what global 
state might be affected by the two calls to prevent it reordering them WRT 
eachother.  Is that what you meant?


(I did wonder about the need to add the kind of data dependency you describe, 
but found it unnecessary.)


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 10:26, Richard Biener wrote:

On Thu, Oct 22, 2015 at 4:01 PM, Nathan Sidwell  wrote:



RETURNS_TWICE will make the invariant motion stop at UNIQUE (HEAD),
but it would have done that anyway.  It will also be a CSE barrier, thus

tem = global;
UNIQUE(HEAD)
tem2 = global;

will not CSE tem2 to tem.


Yes, I can see it would behave like that for something globally visible.  What 
about state that isn't so visible?  (perhaps I'm worrying about something that 
doesn't matter, but I'd like to understand)


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Jakub Jelinek
On Thu, Oct 22, 2015 at 09:08:30AM -0400, Nathan Sidwell wrote:
> On 10/22/15 07:10, Julian Brown wrote:
> >On Thu, 22 Oct 2015 10:05:30 +0200
> >Richard Biener  wrote:
> 
> >>So you'd need to be more precise as to what properties you are trying
> >>to preserve by placing a single stmt somewhere.
> >
> >FWIW an earlier, abandoned attempt at solving the same problem was
> >discussed in the following thread, continuing through June:
> >
> >   https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02647.html
> >
> >Though the details of lowering of OpenACC constructs have changed with
> >Nathan's current patches, the underlying problem remains the same. PTX
> >requires certain operations (bar.sync) to be executed uniformly by all
> >threads in a CTA. IIUC this affects "JOIN" points across all
> >workers/vectors in a gang, in particular (though this is generic code,
> >other -- particularly GPU -- targets may have similar restrictions).
> 
> 
> Richard, does  this answer your question?

I agree with Richard that it would be better to write more about what kind
of IL changes are acceptable with IFN_UNIQUE in the IL and what are not.
E.g. is inlining ok (I'd hope yes)?  Is function splitting ok (bet as long
as all IFN_UNIQUE calls stay in one or the other part, but not both)?
Various loop optimization, ...

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 04:07, Richard Biener wrote:


Yeah, please make them either end or start a BB so we have to check
at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
it also makes it a code motion barrier.


Just so I'm clear, you're not saying that RETURNS_TWICE will stop the call being 
duplicated though?


thinking a little further, a code motion barrier is stronger than I need (but 
conservatively safe).  For instance:


UNIQUE (HEAD)
for (...)
{
  a = 
}
UNIQUE (TAIL)

It would be safe and desirable to move that loop invariant to before the UNIQUE. 
 Perhaps it won't matter in practice -- after all having N physical threads 
calculate it in parallel (just after the HEAD marker, but before the loop) will 
probably take no longer than a single thread doing it while the others wait.[*]


nathan

[*] ut it will take more power.


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 04:07, Richard Biener wrote:

On Thu, Oct 22, 2015 at 10:04 AM, Jakub Jelinek  wrote:



Do you have to scan the whole bb?  E.g. don't or should not those
unique IFNs force end of bb?


Yeah, please make them either end or start a BB so we have to check
at most a single stmt.  ECF_RETURNS_TWICE should achieve that,
it also makes it a code motion barrier.


Thanks, I'd not thought of doing it like that.  Will try.

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 07:10, Julian Brown wrote:

On Thu, 22 Oct 2015 10:05:30 +0200
Richard Biener  wrote:



So you'd need to be more precise as to what properties you are trying
to preserve by placing a single stmt somewhere.


FWIW an earlier, abandoned attempt at solving the same problem was
discussed in the following thread, continuing through June:

   https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02647.html

Though the details of lowering of OpenACC constructs have changed with
Nathan's current patches, the underlying problem remains the same. PTX
requires certain operations (bar.sync) to be executed uniformly by all
threads in a CTA. IIUC this affects "JOIN" points across all
workers/vectors in a gang, in particular (though this is generic code,
other -- particularly GPU -- targets may have similar restrictions).



Richard, does  this answer your question?

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-22 Thread Nathan Sidwell

On 10/22/15 09:17, Jakub Jelinek wrote:

On Thu, Oct 22, 2015 at 09:08:30AM -0400, Nathan Sidwell wrote:



I agree with Richard that it would be better to write more about what kind
of IL changes are acceptable with IFN_UNIQUE in the IL and what are not.
E.g. is inlining ok (I'd hope yes)?  Is function splitting ok (bet as long
as all IFN_UNIQUE calls stay in one or the other part, but not both)?


Essentially, yes.  a set of IFN_UNIQUE form a group  which must not be separated 
 from each other.  The set is discovered implicitly by following the CFG 
(though I suppose we could add an identifying INT_CST operand or something 
equivalent).


nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-21 Thread Nathan Sidwell
This patch implements a new internal function that has a 'uniqueness' property. 
  Jump-threading cannot clone it and tail-merging cannot combine multiple 
instances.


The uniqueness is implemented by a new gimple fn, gimple_call_internal_unique_p. 
 Routines that check for identical or cloneable calls are augmented to check 
this property.  These are:


* tree-ssa-threadedge, which is figuring out if jump threading is a win.  Jump 
threading is inhibited.


* gimple_call_same_target_p, used for tail merging and similar transforms.  Two 
calls of IFN_UNIQUE will never be  the same target.


* tracer.c, which is determining whether to clone a region.

Interestingly jump threading avoids cloning volatile asms (which it admits is 
conservatively safe), but the tracer does not. I wonder if there's a latent 
problem in tracer?


The reason I needed a function with this property is to  preserve the looping 
structure of a function's CFG.  As mentioned in the intro, we mark up loops 
(using this builtin), so the example I gave has the following inserts:


#pragma acc parallel ...
{
 // single mode here
#pragma acc loop ...
IFN_UNIQUE (FORKING  ...)
for (i = 0; i < N; i++) // loop 1
  ... // partitioned mode here
IFN_UNIQUE (JOINING ...)

if (expr) // single mode here
#pragma acc loop ...
  IFN_UNIQUE (FORKING ...)
  for (i = 0; i < N; i++) // loop 2
... // partitioned mode here
  IFN_UNIQUE (JOINING ...)
}

The properly nested loop property of the CFG is preserved through the 
compilation.  This is important as (a) it allows later passes to reconstruct 
this looping structure and (b) hardware constraints require a partioned region 
end for all partitioned threads at a single instruction.


Until I added this unique property, original bring-up  of partitioned execution 
would hit cases of split loops ending in multiple cloned JOINING markers and 
similar cases.


To distinguish different uses of the UNIQUE function, I use the first argument, 
which is expected to be an INTEGER_CST.  I figured this better than using 
multiple new internal fns, all with the unique property, as the latter would 
need (at least) a range check in gimple_call_internal_unique_p rather than a 
simple equality.


Jakub, IYR I originally had IFN_FORK and IFN_JOIN as such distinct internal fns. 
 This replaces that scheme.


ok?

nathan
2015-10-20  Nathan Sidwell  
	Cesar Philippidis  
	
	* internal-fn.c (expand_UNIQUE): New.
	* internal-fn.def (IFN_UNIQUE): New.
	(IFN_UNIQUE_UNSPEC): Define.
	* gimple.h (gimple_call_internal_unique_p): New.
	* gimple.c (gimple_call_same_target_p): Check internal fn
	uniqueness.
	* tracer.c (ignore_bb_p): Check for IFN_UNIQUE call.
	* tree-ssa-threadedge.c
	(record_temporary_equivalences_from_stmts): Likewise.

Index: gimple.c
===
--- gimple.c	(revision 229096)
+++ gimple.c	(working copy)
@@ -1346,7 +1346,8 @@ gimple_call_same_target_p (const gimple
 {
   if (gimple_call_internal_p (c1))
 return (gimple_call_internal_p (c2)
-	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2));
+	&& gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2)
+	&& !gimple_call_internal_unique_p (as_a  (c1)));
   else
 return (gimple_call_fn (c1) == gimple_call_fn (c2)
 	|| (gimple_call_fndecl (c1)
Index: gimple.h
===
--- gimple.h	(revision 229096)
+++ gimple.h	(working copy)
@@ -2895,6 +2895,14 @@ gimple_call_internal_fn (const gimple *g
   return gimple_call_internal_fn (gc);
 }
 
+/* Return true, if this internal gimple call is unique.  */
+
+static inline bool
+gimple_call_internal_unique_p (const gcall *gs)
+{
+  return gimple_call_internal_fn (gs) == IFN_UNIQUE;
+}
+
 /* If CTRL_ALTERING_P is true, mark GIMPLE_CALL S to be a stmt
that could alter control flow.  */
 
Index: internal-fn.c
===
--- internal-fn.c	(revision 229096)
+++ internal-fn.c	(working copy)
@@ -1958,6 +1958,30 @@ expand_VA_ARG (gcall *stmt ATTRIBUTE_UNU
   gcc_unreachable ();
 }
 
+/* Expand the IFN_UNIQUE function according to its first argument.  */
+
+static void
+expand_UNIQUE (gcall *stmt)
+{
+  rtx pattern = NULL_RTX;
+
+  switch (TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)))
+{
+default:
+  gcc_unreachable ();
+  break;
+
+case IFN_UNIQUE_UNSPEC:
+#ifdef HAVE_unique
+  pattern = gen_unique ();
+#endif
+  break;
+}
+
+  if (pattern)
+emit_insn (pattern);
+}
+
 /* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
 
Index: internal-fn.def
===
--- internal-fn.def	(revision 229096)
+++ internal-fn.def	(working copy)
@@ -65,3 +65,11 @@ DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST
 DEF_INTERNAL_FN (MUL_OVERFLOW,