Re: [PATCH, AARCH64] Enable fuse-caller-save for AARCH64

2014-06-19 Thread Tom de Vries

On 19-06-14 05:53, Richard Henderson wrote:

Do we in fact make sure this isn't an ifunc resolver?  I don't immediately see
how those get wired up in the cgraph...


Richard,

using the patch below I changed the 
gcc/testsuite/gcc.target/i386/fuse-caller-save.c testcase to use an ifunc 
resolver, and observed that the fuse-caller-save optimization didn't work.


The reason the optimization doesn't work in this case is that 
default_binds_local_p_1 checks the ifunc attribute:

...
  /* Weakrefs may not bind locally, even though the weakref itself is always
 static and therefore local.  Similarly, the resolver for ifunc functions
 might resolve to a non-local function.
 FIXME: We can resolve the weakref case more curefuly by looking at the
 weakref alias.  */
  else if (lookup_attribute (weakref, DECL_ATTRIBUTES (exp))
   || (TREE_CODE (exp) == FUNCTION_DECL
lookup_attribute (ifunc, DECL_ATTRIBUTES (exp
local_p = false;
...

The default_binds_local_p_1 function is used via this path in the optimization:
get_call_reg_set_usage - get_call_cgraph_rtl_info - 
decl_binds_to_current_def_p - default_binds_local_p - default_binds_local_p_1 .


Thanks,
- Tom


diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save.c
index 4ec4995..012dc12 100644
--- a/gcc/testsuite/gcc.target/i386/fuse-caller-save.c
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save.c
@@ -5,11 +5,18 @@
 /* Testing -fuse-caller-save optimization option.  */
 
 static int __attribute__((noinline))
-bar (int x)
+my_bar (int x)
 {
   return x + 3;
 }
 
+static void (*resolve_bar (void)) (void)
+{
+  return (void*) my_bar;
+}
+
+static int __attribute__((noinline)) __attribute__((ifunc (resolve_bar))) bar (int x);
+
 int __attribute__((noinline))
 foo (int y)
 {
-- 
1.9.1



Re: [PATCH, AARCH64] Enable fuse-caller-save for AARCH64

2014-06-19 Thread Tom de Vries

On 19-06-14 05:21, Richard Henderson wrote:

On 06/01/2014 03:00 AM, Tom de Vries wrote:

+/* Emit call insn with PAT and do aarch64-specific handling.  */
+
+bool
+aarch64_emit_call_insn (rtx pat)
+{
+  rtx insn = emit_call_insn (pat);
+
+  rtx *fusage = CALL_INSN_FUNCTION_USAGE (insn);
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
+}
+


Which can't have been bootstrapped, since this has no return stmt.
Why the bool return type anyway?  Nothing appears to use it.



Richard,

Indeed, the return type should be void, this patch fixes that.

I have no setup to bootstrap this on aarch64. I've build an aarch64 compiler and 
ran the gcc.target/aarch64/fuse-caller-save.c testcase.


Committed as obvious.

Thanks,
- Tom
2014-06-19  Tom de Vries  t...@codesourcery.com

	* config/aarch64/aarch64-protos.h (aarch64_emit_call_insn): Change
	return type to void.
	* config/aarch64/aarch64.c (aarch64_emit_call_insn): Same.

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 213c8dc..53023ba 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -245,7 +245,7 @@ void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
 void aarch64_init_expanders (void);
 void aarch64_print_operand (FILE *, rtx, char);
 void aarch64_print_operand_address (FILE *, rtx);
-bool aarch64_emit_call_insn (rtx);
+void aarch64_emit_call_insn (rtx);
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b2d005b..f0aafbd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3395,7 +3395,7 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
 
 /* Emit call insn with PAT and do aarch64-specific handling.  */
 
-bool
+void
 aarch64_emit_call_insn (rtx pat)
 {
   rtx insn = emit_call_insn (pat);
-- 
1.9.1



Re: -fuse-caller-save - Collect register usage information

2014-06-19 Thread Tom de Vries

On 19-06-14 07:13, Richard Henderson wrote:

On 05/19/2014 07:30 AM, Tom de Vries wrote:

+  for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn))
+{
+  HARD_REG_SET insn_used_regs;
+
+  if (!NONDEBUG_INSN_P (insn))
+   continue;
+
+  find_all_hard_reg_sets (insn, insn_used_regs, false);
+
+  if (CALL_P (insn)
+  !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set))
+   {
+ CLEAR_HARD_REG_SET (node-function_used_regs);
+ return;
+   }
+
+  IOR_HARD_REG_SET (node-function_used_regs, insn_used_regs);
+}

As an aside, wouldn't it work out better if we collect into a local variable
instead of writing to memory here in node-function_used_regs each time?


Richard,

Agreed. This patch implements that. I'll bootstrap and reg-test on x86_64 and 
commit as obvious.


Thanks,
- Tom


2014-06-19  Tom de Vries  t...@codesourcery.com

	* final.c (collect_fn_hard_reg_usage): Add and use variable
	function_used_regs.

diff --git a/gcc/final.c b/gcc/final.c
index 4f08073..e39930d 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -4760,13 +4760,13 @@ collect_fn_hard_reg_usage (void)
   int i;
 #endif
   struct cgraph_rtl_info *node;
+  HARD_REG_SET function_used_regs;
 
   /* ??? To be removed when all the ports have been fixed.  */
   if (!targetm.call_fusage_contains_non_callee_clobbers)
 return;
 
-  node = cgraph_rtl_info (current_function_decl);
-  gcc_assert (node != NULL);
+  CLEAR_HARD_REG_SET (function_used_regs);
 
   for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn))
 {
@@ -4779,25 +4779,26 @@ collect_fn_hard_reg_usage (void)
 
   if (CALL_P (insn)
 	   !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set))
-	{
-	  CLEAR_HARD_REG_SET (node-function_used_regs);
-	  return;
-	}
+	return;
 
-  IOR_HARD_REG_SET (node-function_used_regs, insn_used_regs);
+  IOR_HARD_REG_SET (function_used_regs, insn_used_regs);
 }
 
   /* Be conservative - mark fixed and global registers as used.  */
-  IOR_HARD_REG_SET (node-function_used_regs, fixed_reg_set);
+  IOR_HARD_REG_SET (function_used_regs, fixed_reg_set);
 
 #ifdef STACK_REGS
   /* Handle STACK_REGS conservatively, since the df-framework does not
  provide accurate information for them.  */
 
   for (i = FIRST_STACK_REG; i = LAST_STACK_REG; i++)
-SET_HARD_REG_BIT (node-function_used_regs, i);
+SET_HARD_REG_BIT (function_used_regs, i);
 #endif
 
+  node = cgraph_rtl_info (current_function_decl);
+  gcc_assert (node != NULL);
+
+  COPY_HARD_REG_SET (node-function_used_regs, function_used_regs);
   node-function_used_regs_valid = 1;
 }
 
-- 
1.9.1



Re: -fuse-caller-save - Collect register usage information

2014-06-19 Thread Tom de Vries

On 19-06-14 07:13, Richard Henderson wrote:

On 05/19/2014 07:30 AM, Tom de Vries wrote:

+  for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn))
+{
+  HARD_REG_SET insn_used_regs;
+
+  if (!NONDEBUG_INSN_P (insn))
+   continue;
+
+  find_all_hard_reg_sets (insn, insn_used_regs, false);
+
+  if (CALL_P (insn)
+  !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set))
+   {
+ CLEAR_HARD_REG_SET (node-function_used_regs);
+ return;
+   }
+
+  IOR_HARD_REG_SET (node-function_used_regs, insn_used_regs);
+}


SNIP


Let's suppose that we've got a rather large function, with only local calls for
which we can acquire usage.  Let's suppose that even one of those callees
further calls something else, such that insn_used_regs == call_used_reg_set.

We fill node-function_used_regs immediately, but keep scanning the rest of the
large function.



+
+  /* Be conservative - mark fixed and global registers as used.  */
+  IOR_HARD_REG_SET (node-function_used_regs, fixed_reg_set);
+  for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
+if (global_regs[i])
+  SET_HARD_REG_BIT (node-function_used_regs, i);
+
+#ifdef STACK_REGS
+  /* Handle STACK_REGS conservatively, since the df-framework does not
+ provide accurate information for them.  */
+
+  for (i = FIRST_STACK_REG; i = LAST_STACK_REG; i++)
+SET_HARD_REG_BIT (node-function_used_regs, i);
+#endif
+
+  node-function_used_regs_valid = 1;


Wouldn't it be better to compare the collected function_used_regs; if it
contains all of call_used_reg_set, decline to set function_used_regs_valid.
That way, we'll early exit from the above loop whenever we see that we can't
improve over the default call-clobber set.



Richard,

Agreed.  Attached patch implements this (on top of the minor rewrite of 
https://gcc.gnu.org/ml/gcc-patches/2014-06/msg01535.html ).



Although perhaps function_used_regs_valid is no longer the best name in that
case...



I think the name is still ok.  The field function_used_regs_valid just states 
that the function_used_regs field is valid and can be used.



OK for trunk if bootstrap and reg-test on x86_64 is ok ?

Thanks,
- Tom
2014-06-19  Tom de Vries  t...@codesourcery.com

	* final.c (collect_fn_hard_reg_usage): Don't save function_used_regs if
	it contains all call_used_regs.

diff --git a/gcc/final.c b/gcc/final.c
index e39930d..e67e84b 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -4795,6 +4795,11 @@ collect_fn_hard_reg_usage (void)
 SET_HARD_REG_BIT (function_used_regs, i);
 #endif
 
+  /* The information we have gathered is only interesting if it exposes a
+ register from the call_used_regs that is not used in this function.  */
+  if (hard_reg_set_subset_p (call_used_reg_set, function_used_regs))
+return;
+
   node = cgraph_rtl_info (current_function_decl);
   gcc_assert (node != NULL);
 
-- 
1.9.1



Fix finding reg-sets of call insn in collect_fn_hard_reg_usage

2014-06-19 Thread Tom de Vries

Richard,

atm the moment, when processing a call in collect_fn_hard_reg_usage, we get the 
used regs from the callee, but forget to register the regs in the call insn 
itself (ouch).  This patch fixes this by introducing an extra IOR_HARD_REG_SET.


We also switch the order of find_all_hard_reg_sets and get_call_reg_set_usage. 
There's no point in doing find_all_hard_reg_sets on a call if 
get_call_reg_set_usage returns false.


OK for trunk if bootstrap and reg-test on x86_64 is ok ?

Thanks,
- Tom
2014-06-19  Tom de Vries  t...@codesourcery.com

	* final.c (collect_fn_hard_reg_usage): Add separate IOR_HARD_REG_SET for
	get_call_reg_set_usage.

diff --git a/gcc/final.c b/gcc/final.c
index e67e84b..bbeb50d 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -4775,12 +4775,16 @@ collect_fn_hard_reg_usage (void)
   if (!NONDEBUG_INSN_P (insn))
 	continue;
 
-  find_all_hard_reg_sets (insn, insn_used_regs, false);
+  if (CALL_P (insn))
+	{
+	  if (!get_call_reg_set_usage (insn, insn_used_regs,
+   call_used_reg_set))
+	return;
 
-  if (CALL_P (insn)
-	   !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set))
-	return;
+	  IOR_HARD_REG_SET (function_used_regs, insn_used_regs);
+	}
 
+  find_all_hard_reg_sets (insn, insn_used_regs, false);
   IOR_HARD_REG_SET (function_used_regs, insn_used_regs);
 }
 
-- 
1.9.1



Re: [PATCH, ARM] Enable fuse-caller-save for ARM

2014-06-19 Thread Tom de Vries

On 19-06-14 05:59, Richard Henderson wrote:

On 06/01/2014 04:27 AM, Tom de Vries wrote:

+  if (TARGET_AAPCS_BASED)
+{
+  /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
+linker.  We need to add these to allow
+arm_call_fusage_contains_non_callee_clobbers to return true.  */
+  rtx *fusage = CALL_INSN_FUNCTION_USAGE (insn);
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
+  clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM));


Why are you adding CC_REGNUM if fixed registers are automatically included?



Richard,

You're right, setting a fixed register here is not required for fuse-caller-save 
to work safely.


But it fits the definition of the hook 
TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS:

...
Set to true if all the calls in the current function contain clobbers in 
CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call rather 
than by the callee, and are not already set or clobbered in the call pattern.

...

We can adapt the definition to not include fixed registers. I can make a patch 
for that, if you like.


Thanks,
- Tom


Re: [PATCH, AARCH64] Enable fuse-caller-save for AARCH64

2014-06-19 Thread Tom de Vries

On 19-06-14 05:53, Richard Henderson wrote:

On 06/01/2014 03:00 AM, Tom de Vries wrote:

+aarch64_emit_call_insn (rtx pat)
+{
+  rtx insn = emit_call_insn (pat);
+
+  rtx *fusage = CALL_INSN_FUNCTION_USAGE (insn);
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));

Actually, I'd like to know more about how this is supposed to work.

Why are you only marking the two registers that would be used by a PLT entry,
but not those clobbered by the ld.so trampoline, or indeed the unknown function
that would be called from the PLT.

Oh, I see, looking at the code we do actually follow the cgraph and make sure
it is a direct call with a known destination.  So, in fact, it's only the
registers that could be clobbered by ld branch islands (so these two are still
correct for aarch64).

This means the documentation is actually wrong when it mentions PLTs at all.


Yes, if we go from the point of view that the 
TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS hooks sole purpose is to enable 
the fuse-caller-save optimization.


How about this updated definition ? OK for trunk if re-testing on arm succeeds ?

Thanks,
- Tom


2014-06-19  Tom de Vries  t...@codesourcery.com

	* config/arm/arm.c (arm_emit_call_insn): Remove clobber of CC_REGNUM.
	* target.def: Update defition.
	* doc/tm.texi: Regenerate.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d293b5b..178f08b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -17642,11 +17642,11 @@ arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
   if (TARGET_AAPCS_BASED)
 {
   /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
-	 linker.  We need to add these to allow setting
-	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  */
+	 linker.  We need to add IP to allow setting
+	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  CC is not
+	 needed since it's a fixed register.  */
   rtx *fusage = CALL_INSN_FUNCTION_USAGE (insn);
   clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
-  clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM));
 }
 }
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c272630..b0a8dbd 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4884,14 +4884,11 @@ Whether this target supports splitting the stack when the options described in @
 @cindex miscellaneous register hooks
 
 @deftypevr {Target Hook} bool TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
-set to true if all the calls in the current function contain clobbers in
-CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call
-rather than by the callee, and are not already set or clobbered in the call
-pattern.  Examples of such registers are registers used in PLTs and stubs,
-and temporary registers used in the call instruction but not present in the
-rtl pattern.  Another way to formulate it is the registers not present in the
-rtl pattern that are clobbered by the call assuming the callee does not
-clobber any register.  The default version of this hook is set to false.
+Set to true if each call that binds to a local definition contain clobbers
+in CALL_INSN_FUNCTION_USAGE for the non-fixed registers that are clobbered by
+the call rather than by the callee, and are not already set or clobbered in
+the call pattern.  The default version of this hook is set to false.  The
+purpose of this hook it to enable the fuse-caller-save optimization.
 @end deftypevr
 
 @node Varargs
diff --git a/gcc/target.def b/gcc/target.def
index e455211..b738281 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5128,18 +5128,15 @@ FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, and the PIC_OFFSET_TABLE_REGNUM.,
  hook_void_bitmap)
 
 /* Targets should define this target hook to mark that non-callee clobbers are
-   present in CALL_INSN_FUNCTION_USAGE for all the calls in the current
-   function.  */
+   present in CALL_INSN_FUNCTION_USAGE for all the calls that bind to a local
+   definition.  */
 DEFHOOKPOD
 (call_fusage_contains_non_callee_clobbers,
- set to true if all the calls in the current function contain clobbers in\n\
-CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call\n\
-rather than by the callee, and are not already set or clobbered in the call\n\
-pattern.  Examples of such registers are registers used in PLTs and stubs,\n\
-and temporary registers used in the call instruction but not present in the\n\
-rtl pattern.  Another way to formulate it is the registers not present in the\n\
-rtl pattern that are clobbered by the call assuming the callee does not\n\
-clobber any register.  The default version of this hook is set to false.,
+ Set to true if each call that binds to a local definition contain clobbers\n\
+in CALL_INSN_FUNCTION_USAGE for the non-fixed registers that are clobbered by\n\
+the call rather than by the callee, and are not already set or clobbered in\n\
+the call pattern

Re: [PATCH, AARCH64] Enable fuse-caller-save for AARCH64

2014-06-20 Thread Tom de Vries

On 19-06-14 20:41, Richard Henderson wrote:

On 06/19/2014 11:25 AM, Tom de Vries wrote:

On 19-06-14 05:53, Richard Henderson wrote:

On 06/01/2014 03:00 AM, Tom de Vries wrote:

+aarch64_emit_call_insn (rtx pat)
+{
+  rtx insn = emit_call_insn (pat);
+
+  rtx *fusage = CALL_INSN_FUNCTION_USAGE (insn);
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
+  clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));

Actually, I'd like to know more about how this is supposed to work.

Why are you only marking the two registers that would be used by a PLT entry,
but not those clobbered by the ld.so trampoline, or indeed the unknown function
that would be called from the PLT.

Oh, I see, looking at the code we do actually follow the cgraph and make sure
it is a direct call with a known destination.  So, in fact, it's only the
registers that could be clobbered by ld branch islands (so these two are still
correct for aarch64).

This means the documentation is actually wrong when it mentions PLTs at all.


Yes, if we go from the point of view that the
TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS hooks sole purpose is to enable
the fuse-caller-save optimization.

How about this updated definition ? OK for trunk if re-testing on arm succeeds ?


I did like the doc including mention of stubs, because they're easy to
forget.  How about

Set to true if each call that binds to a local definition explicitly clobbers
or sets all non-fixed registers modified by performing the call.  That is, by
the call pattern itself, or by code that might be inserted by the linker
(e.g. stubs, veneers, branch islands), but not including those modifiable by
the callee.  The affected registers may be mentioned explicitly in the
call pattern, or included as clobbers in CALL_INSN_FUNCTION_USAGE.
The default version of this hook is set to false.  The purpose of this hook
is to enable the fuse-caller-save optimization.




Looks good to me.  Bootstrapped and committed as attached.

Thanks,
- Tom

2014-06-20  Tom de Vries  t...@codesourcery.com

	* target.def (call_fusage_contains_non_callee_clobbers): Update
	definition.
	* doc/tm.texi: Regenerate.

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c272630..45281ae 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4884,14 +4884,14 @@ Whether this target supports splitting the stack when the options described in @
 @cindex miscellaneous register hooks
 
 @deftypevr {Target Hook} bool TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
-set to true if all the calls in the current function contain clobbers in
-CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call
-rather than by the callee, and are not already set or clobbered in the call
-pattern.  Examples of such registers are registers used in PLTs and stubs,
-and temporary registers used in the call instruction but not present in the
-rtl pattern.  Another way to formulate it is the registers not present in the
-rtl pattern that are clobbered by the call assuming the callee does not
-clobber any register.  The default version of this hook is set to false.
+Set to true if each call that binds to a local definition explicitly
+clobbers or sets all non-fixed registers modified by performing the call.
+That is, by the call pattern itself, or by code that might be inserted by the
+linker (e.g. stubs, veneers, branch islands), but not including those
+modifiable by the callee.  The affected registers may be mentioned explicitly
+in the call pattern, or included as clobbers in CALL_INSN_FUNCTION_USAGE.
+The default version of this hook is set to false.  The purpose of this hook
+is to enable the fuse-caller-save optimization.
 @end deftypevr
 
 @node Varargs
diff --git a/gcc/target.def b/gcc/target.def
index e455211..ee250e6 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5128,18 +5128,18 @@ FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, and the PIC_OFFSET_TABLE_REGNUM.,
  hook_void_bitmap)
 
 /* Targets should define this target hook to mark that non-callee clobbers are
-   present in CALL_INSN_FUNCTION_USAGE for all the calls in the current
-   function.  */
+   present in CALL_INSN_FUNCTION_USAGE for all the calls that bind to a local
+   definition.  */
 DEFHOOKPOD
 (call_fusage_contains_non_callee_clobbers,
- set to true if all the calls in the current function contain clobbers in\n\
-CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call\n\
-rather than by the callee, and are not already set or clobbered in the call\n\
-pattern.  Examples of such registers are registers used in PLTs and stubs,\n\
-and temporary registers used in the call instruction but not present in the\n\
-rtl pattern.  Another way to formulate it is the registers not present in the\n\
-rtl pattern that are clobbered by the call assuming the callee does not\n\
-clobber any register.  The default version of this hook is set to false.,
+ Set to true if each call that binds to a local definition explicitly\n

Re: Fix finding reg-sets of call insn in collect_fn_hard_reg_usage

2014-06-26 Thread Tom de Vries

On 19-06-14 18:47, Richard Henderson wrote:

And I forgot to mention it might be worth while to notice simple recursion.
Avoid the early exit path if caller == callee, despite the caller-save info not
being valid.




Richard,

attached patch enables handling of self-recursive functions in the 
fuse-caller-save optimization, and adds a test-case.


I've done an x86_64 build and ran the i386.exp testsuite.

OK for trunk if full bootstrap and reg-test succeeds?

Thanks,
- Tom

2014-06-26  Tom de Vries  t...@codesourcery.com

	* final.c (get_call_fndecl): Declare.
	(self_recursive_call_p): New function.
	(collect_fn_hard_reg_usage): Handle self-recursive function calls.

	* gcc.target/i386/fuse-caller-save-rec.c: New test.

diff --git a/gcc/final.c b/gcc/final.c
index 9525efc..ed0ba0b 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -225,6 +225,7 @@ static int final_addr_vec_align (rtx);
 #endif
 static int align_fuzz (rtx, rtx, int, unsigned);
 static void collect_fn_hard_reg_usage (void);
+static tree get_call_fndecl (rtx);
 
 /* Initialize data in final at the beginning of a compilation.  */
 
@@ -4750,6 +4751,16 @@ make_pass_clean_state (gcc::context *ctxt)
   return new pass_clean_state (ctxt);
 }
 
+/* Return true if INSN is a call to the the current function.  */
+
+static bool
+self_recursive_call_p (rtx insn)
+{
+  tree fndecl = get_call_fndecl (insn);
+  return (fndecl == current_function_decl
+	   decl_binds_to_current_def_p (fndecl));
+}
+
 /* Collect hard register usage for the current function.  */
 
 static void
@@ -4775,7 +4786,8 @@ collect_fn_hard_reg_usage (void)
   if (!NONDEBUG_INSN_P (insn))
 	continue;
 
-  if (CALL_P (insn))
+  if (CALL_P (insn)
+	   !self_recursive_call_p (insn))
 	{
 	  if (!get_call_reg_set_usage (insn, insn_used_regs,
    call_used_reg_set))
diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
new file mode 100644
index 000..b30a0b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fuse-caller-save -fomit-frame-pointer -fno-optimize-sibling-calls } */
+/* { dg-additional-options -mregparm=1 { target ia32 } } */
+
+/* Test -fuse-caller-save optimization on self-recursive function.  */
+
+static int __attribute__((noinline))
+bar (int x)
+{
+  if (x  4)
+return bar (x - 3);
+  return 0;
+}
+
+int __attribute__((noinline))
+foo (int y)
+{
+  return y + bar (y);
+}
+
+int
+main (void)
+{
+  return !(foo (5) == 13);
+}
+
+/* Verify that no registers where saved on stack.  */
+/* { dg-final { scan-assembler-not \.cfi_offset  } } */
+
+/* Verify that bar is self-recursive.  */
+/* { dg-final { scan-assembler-times call\tbar 2 } } */
+
-- 
1.9.1



Re: Fix finding reg-sets of call insn in collect_fn_hard_reg_usage

2014-06-27 Thread Tom de Vries

On 19-06-14 18:40, Richard Henderson wrote:

On 06/19/2014 09:07 AM, Tom de Vries wrote:


2014-06-19  Tom de Vries  t...@codesourcery.com

* final.c (collect_fn_hard_reg_usage): Add separate IOR_HARD_REG_SET for
get_call_reg_set_usage.


Ok, as far as it goes, but...

It seems like there should be quite a bit of overlap with regs_ever_live here.
  How much of that previous computation can we leverage?

It appears that regs_ever_live includes any register mentioned explicitly, and
thus the only registers it doesn't contain are those killed by the callees.
That should be an easier scan than the rtl, since we have those already
collected in the cgraph.

Sorry I wasn't paying much attention earlier when this was first posted, when
questions like this may have been answered.



Richard,

At the moment, collect_fn_hard_reg_usage is run in pass_final, after final (), 
that is, after the final splitting of insns. The idea is that we use the most 
final representation available, to be on the safe side.


AFAIU, the regs_ever_live information is computed using the df infrastructure, 
which requires the cfg, which is available only until pass_free_cfg for all 
targets (more details in this discussion: 
https://gcc.gnu.org/ml/gcc-patches/2013-05/msg01060.html ). I don't think 
regs_ever_live is guaranteed to be up to date afterwards.


So in order to known whether it's safe and optimal to use regs_ever_live 
instead, the question is whether the passes after pass_free_cfg (are allowed to) 
add or remove sets or clobbers of call_really_used_regs. I don't know the full 
answer there.


Eric, can you comment?

Thanks,
- Tom



[PATCH] Don't allow combination of read/write and earlyclobber constraint modifier

2014-07-02 Thread Tom de Vries

On 01-07-14 21:47, Jeff Law wrote:

On 07/01/14 13:27, Tom de Vries wrote:

So my question is: is the combination of '' and '+' supported ? If so,
what is the exact semantics ? If not, should we warn or give an error ?



I don't think we can define any reasonable semantics for +.  My recommendation
would be for this to be considered a hard error.



[ move discussion from gcc ml to gcc-patches ml ]

Attached patch detects the combination of + and  constrains during genrecog, 
and generates an error like this:

...
/home/vries/gcc_versions/devel/src/gcc/config/aarch64/aarch64-simd.md:1020: 
operand 0 has in-out reload, incompatible with earlyclobber
/home/vries/gcc_versions/devel/src/gcc/config/aarch64/aarch64-simd.md:1020: 
operand 0 has in-out reload, incompatible with earlyclobber
/home/vries/gcc_versions/devel/src/gcc/config/aarch64/aarch64-simd.md:1020: 
operand 0 has in-out reload, incompatible with earlyclobber

make[2]: *** [s-recog] Error 1
...
The error triggers three times, once for each mode iterator element.

OK if x86_64 bootstrap succeeds ?

Thanks,
- Tom
2014-07-02  Tom de Vries  t...@codesourcery.com

	* genrecog.c (validate_pattern): Don't allow earlyclobber constraint
	modifier with read/write constraint modifier.

diff --git a/gcc/genrecog.c b/gcc/genrecog.c
index 457b59c..ad709ee 100644
--- a/gcc/genrecog.c
+++ b/gcc/genrecog.c
@@ -481,6 +481,13 @@ validate_pattern (rtx pattern, rtx insn, rtx set, int set_code)
    rtx_name[GET_CODE (insn)]);
 	  }
 
+	if (constraints0 == '+'
+		 strchr (XSTR (pattern, 2), '') != NULL)
+	  error_with_line (pattern_lineno,
+			   operand %d has in-out reload, incompatible with
+			earlyclobber,
+			   XINT (pattern, 0));
+
 	/* A MATCH_OPERAND that is a SET should have an output reload.  */
 	else if (set  constraints0)
 	  {
-- 
1.9.1



Re: combination of read/write and earlyclobber constraint modifier

2014-07-02 Thread Tom de Vries

On 02-07-14 08:23, Marc Glisse wrote:

In the first example you gave, looking at the pattern (no match_dup, setting the
full register), it seems that it may have wanted = instead of +.


[ move discussion from gcc ml to gcc-patches ml ]

Marcus,

The + constraint on operand 0 of vec_unpack_trunc_mode seems wrong, since the 
template does not use the operand as input.


This patch fixes that.

OK for trunk if aarch64 build  regtest succeeds ?

Thanks,
- Tom


2014-07-02  Tom de Vries  t...@codesourcery.com

	* config/aarch64/aarch64-simd.md
	(define_insn vec_unpack_trunc_mode): Fix constraint.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1c32f0c..0377de4 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1018,7 +1018,7 @@
 ;; For quads.
 
 (define_insn vec_pack_trunc_mode
- [(set (match_operand:VNARROWQ2 0 register_operand +w)
+ [(set (match_operand:VNARROWQ2 0 register_operand =w)
(vec_concat:VNARROWQ2
 	 (truncate:VNARROWQ (match_operand:VQN 1 register_operand w))
 	 (truncate:VNARROWQ (match_operand:VQN 2 register_operand w]
-- 
1.9.1



Re: combination of read/write and earlyclobber constraint modifier

2014-07-03 Thread Tom de Vries

On 03-07-14 10:20, Marcus Shawcroft wrote:

On 2 July 2014 09:02, Tom de Vries tom_devr...@mentor.com wrote:

On 02-07-14 08:23, Marc Glisse wrote:


In the first example you gave, looking at the pattern (no match_dup,
setting the
full register), it seems that it may have wanted = instead of +.



[ move discussion from gcc ml to gcc-patches ml ]

Marcus,

The + constraint on operand 0 of vec_unpack_trunc_mode seems wrong, since
the template does not use the operand as input.

This patch fixes that.

OK for trunk if aarch64 build  regtest succeeds ?


Your patch looks fine, operand 0 isn't used for input.  OK assuming no
regression.   Did you find this by inspection or is this the cause of
some bug?



Marcus,

I found this by inspection: https://gcc.gnu.org/ml/gcc/2014-07/msg7.html .

Thanks,
- Tom



[PATCH, COMMITTED] Update earlyclobber documentation

2014-07-04 Thread Tom de Vries

[ was: Re: combination of read/write and earlyclobber constraint modifier on 
gcc@ ]
On 03-07-14 00:42, Jeff Law wrote:

Based on various followups (public  private), let's go with the clarification
above.  Richard E. explicitly added support for this in the mid/late 90s as an
optimization for the ARM.


Jeff,

Committed as attached.

Thanks to all for the helpful comments.

- Tom
2014-07-04  Tom de Vries  t...@codesourcery.com

	* doc/md.texi (@subsection Constraint Modifier Characters): Clarify
	combination of earlyclobber and read/write modifiers.

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 539865e..fde67d7 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -1582,7 +1582,10 @@ alternatives of this form often allows GCC to produce better code
 when only some of the inputs can be affected by the earlyclobber.
 See, for example, the @samp{mulsi3} insn of the ARM@.
 
-@samp{} does not obviate the need to write @samp{=}.
+Furthermore, if the @dfn{earlyclobber} operand is also read/write operand, then
+that operand is modified only after it's used.
+
+@samp{} does not obviate the need to write @samp{=} or @samp{+}.
 
 @cindex @samp{%} in constraint
 @item %
-- 
1.9.1



Re: [PATCH, AArch64, Testsuite] Specify -fno-use-caller-save for func-ret* tests

2014-07-08 Thread Tom de Vries

On 01-07-14 19:26, Jeff Law wrote:

On 07/01/14 09:51, Yufeng Zhang wrote:

Hi,

This patch resolves a conflict between the aapcs64 test framework for
func-ret tests and the optimization option -fuse-caller-save, which was
enabled by default at -O1 or above recently.



Minor detail: it's enabled by default at -O2 or above:
...
{ OPT_LEVELS_2_PLUS, OPT_fuse_caller_save, NULL, 1 },
...


Basically, the test framework has an inline-assembly based mechanism in
place which invokes the test facility function right on the return of a
tested function.

  The compiler with -fuse-caller-save is unable to
 identify the unconventional call graph and carries out the optimization
 regardless.

AFAIU, we're overwriting the return register to implement a function call at 
return in order to see the exact state of registers at return:

...
__attribute__ ((noinline)) unsigned char
func_return_val_0 (int i, double d, unsigned char t)
{
  asm (::r (i),r (d));
  asm volatile (mov %0, x30 : =r (saved_return_address));
  asm volatile (mov x30, %0 : : r ((unsigned long long) myfunc));
  return t;
}
...

But we're not informing the compiler that a hidden function call takes place. 
This patch fixes that, and there's no need to disable fuse-caller-save.


Tested with aarch64 build.  OK for trunk?

Thanks,
- Tom

2014-07-08  Tom de Vries  t...@codesourcery.com

	* gcc.target/aarch64/aapcs64/aapcs64.exp
	(additional_flags_for_func_ret): Remove.
	(func-ret-*.c): Use additional_flags.
	* gcc.target/aarch64/aapcs64/abitest-2.h (FUNC_VAL_CHECK): Add missing
	call_used_regs clobbers.

Index: gcc/testsuite/gcc.target/aarch64/aapcs64/aapcs64.exp
===
--- gcc/testsuite/gcc.target/aarch64/aapcs64/aapcs64.exp (revision 212294)
+++ gcc/testsuite/gcc.target/aarch64/aapcs64/aapcs64.exp (working copy)
@@ -48,15 +48,11 @@ foreach src [lsort [glob -nocomplain $sr
 }
 
 # Test function return value.
-#   Disable -fuse-caller-save to prevent the compiler from generating
-#   conflicting code.
-set additional_flags_for_func_ret $additional_flags
-append additional_flags_for_func_ret  -fno-use-caller-save
 foreach src [lsort [glob -nocomplain $srcdir/$subdir/func-ret-*.c]] {
 if {[runtest_file_p $runtests $src]} {
 	c-torture-execute [list $src \
 $srcdir/$subdir/abitest.S] \
-$additional_flags_for_func_ret
+$additional_flags
 }
 }
 
Index: gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-2.h
===
--- gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-2.h (revision 212294)
+++ gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-2.h (working copy)
@@ -80,10 +80,18 @@ __attribute__ ((noinline)) type FUNC_NAM
The previous approach of sequentially calling myfunc right after	  \
this function does not guarantee myfunc see the exact register	  \
content, as compiler may emit code in between the two calls,	  \
-   especially during the -O0 codegen.  */  \
+   especially during the -O0 codegen.  \
+   However, since we're doing a call, we need to clobber all call	  \
+   used regs.  */			  \
 asm volatile (mov %0, x30 : =r (saved_return_address));		  \
-asm volatile (mov x30, %0 : : r ((unsigned long long) myfunc));   \
-return t;  \
+asm volatile (mov x30, %0 : : r ((unsigned long long) myfunc) :	  \
+		  x0, x1, x2, x3, x4, x5, x6, x7,	  \
+		  x8,	 x9,	x10, x11, x12, x13, x14, x15, \
+		  x16, x17, x18,	  \
+		  v0,	 v1,	v2,  v3,  v4,  v5,  v6,  v7,  \
+		  v16, v17, v18, v19, v20, v21, v22, v23, \
+		  v24, v25, v26, v27, v28, v29, v30, v31);\
+return t;\
   }
 #include TESTFILE
 


Simplify gcc.target/mips/fuse-caller-save*.c

2014-07-09 Thread Tom de Vries

Richard,

during testing the gcc.target/mips/fuse-caller-save*.c test-cases with more 
combinations of -march, -mabi, -fpic etc, I found that the checks for amount of 
stores are rather fragile, so I removed them in this patch.


The remaining checks check for absence of $16. To address the risk that $16 is 
absent for another reason than -fuse-caller-save, I've added 3 corresponding 
test-cases that check for presence of $16 with -fno-use-caller-save.


The fuse-caller-save tests now pass for all combinations I've tried. OK for 
trunk?

Thanks,
- Tom
2014-07-09  Tom de Vries  t...@codesourcery.com

	* gcc.target/mips/fuse-caller-save.c: Remove test on number of stores.
	* gcc.target/mips/fuse-caller-save-mips16.c: Same.
	* gcc.target/mips/fuse-caller-save-micromips.c: Same.
	* gcc.target/mips/fuse-caller-save-neg.c: New test.
	* gcc.target/mips/fuse-caller-save-mips16-neg.c: Same.
	* gcc.target/mips/fuse-caller-save-micromips-neg.c: Same.

diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips-neg.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips-neg.c
new file mode 100644
index 000..9d43be9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips-neg.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options -fno-use-caller-save (-mmicromips) } */
+/* At -O0 and -O1, the register allocator behaves more conservatively, and
+   the fuse-caller-save optimization doesnt' trigger.  */
+/* { dg-skip-if  { *-*-* }  { -O0 -O1 } } */
+/* Testing -fuse-caller-save optimization option.  */
+
+#define ATTRIBUTE MICROMIPS
+#include fuse-caller-save.h
+
+/* Check that the first caller-save register is used.  This is to make sure that
+   test fuse-caller-save-mips16 tests something meaningful.  */
+/* { dg-final { scan-assembler \\\$16 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
index 6ad01c7..bb70890 100644
--- a/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
@@ -8,10 +8,5 @@
 #define ATTRIBUTE MICROMIPS
 #include fuse-caller-save.h
 
-/* Check that there are only 2 stack-saves: r31 in main and foo.  */
-
-/* Check that there only 2 sw/sd.  */
-/* { dg-final { scan-assembler-times (?n)s\[wd\]\t\\\$.*,.*\\(\\\$sp\\) 2 } } */
-
 /* Check that the first caller-save register is unused.  */
 /* { dg-final { scan-assembler-not \\\$16 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16-neg.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16-neg.c
new file mode 100644
index 000..cb6360b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16-neg.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options -fno-use-caller-save (-mips16) } */
+/* At -O0 and -O1, the register allocator behaves more conservatively, and
+   the fuse-caller-save optimization doesnt' trigger.  */
+/* { dg-skip-if  { *-*-* }  { -O0 -O1 } } */
+/* Testing -fuse-caller-save optimization option.  */
+
+#define ATTRIBUTE MIPS16
+#include fuse-caller-save.h
+
+/* Check that the first caller-save register is used.  This is to make sure that
+   test fuse-caller-save-mips16 tests something meaningful.  */
+/* { dg-final { scan-assembler \\\$16 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c
index a7c6cf4..62b33a7 100644
--- a/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c
@@ -8,10 +8,5 @@
 #define ATTRIBUTE MIPS16
 #include fuse-caller-save.h
 
-/* Check that there are only 2 stack-saves: r31 in main and foo.  */
-
-/* Check that there only 2 sw/sd.  */
-/* { dg-final { scan-assembler-times (?n)s\[wd\]\t\\\$.*,.*\\(\\\$sp\\) 2 } } */
-
 /* Check that the first caller-save register is unused.  */
 /* { dg-final { scan-assembler-not \\\$16 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-neg.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-neg.c
new file mode 100644
index 000..3de0168
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-neg.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options -fno-use-caller-save } */
+/* At -O0 and -O1, the register allocator behaves more conservatively, and
+   the fuse-caller-save optimization doesnt' trigger.  */
+/* { dg-skip-if  { *-*-* }  { -O0 -O1 } } */
+/* Testing -fuse-caller-save optimization option.  */
+
+#define ATTRIBUTE NOCOMPRESSION
+#include fuse-caller-save.h
+
+/* Check that the first caller-save register is used.  This is to make sure that
+   test fuse-caller-save-mips16 tests something meaningful.  */
+/* { dg-final { scan-assembler \\\$16 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save.c
index 72c08fe..a0267f0 100644
--- a/gcc/testsuite/gcc.target/mips/fuse

Re: Simplify gcc.target/mips/fuse-caller-save*.c

2014-07-09 Thread Tom de Vries

On 09-07-14 20:32, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

Richard,

during testing the gcc.target/mips/fuse-caller-save*.c test-cases with more
combinations of -march, -mabi, -fpic etc, I found that the checks for amount of
stores are rather fragile, so I removed them in this patch.


Which combinations specifically?  I don't see offhand why -march would
make a difference,


--target_board=unix/-march=mips32:
...
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O2   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O3 -fomit-frame-pointer 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O3 -g   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -Os   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O2 -flto -flto-partition=none 
  scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O2 -flto 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2

...

We're using save instead of sw.


or why -mabi would make a difference


--target_board=unix/-mabi=64:
...
FAIL: gcc.target/mips/fuse-caller-save-micromips.c   -O2   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-micromips.c   -O3 -fomit-frame-pointer 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-micromips.c   -O3 -g 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-micromips.c   -Os   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-micromips.c   -O2 -flto 
-flto-partition=none   scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-micromips.c   -O2 -flto 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save.c   -O2   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save.c   -O3 -fomit-frame-pointer 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save.c   -O3 -g   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save.c   -Os   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save.c   -O2 -flto -flto-partition=none 
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save.c   -O2 -flto   scan-assembler-times 
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2

...

 for -mno-abicalls.

With --target_board=unix/-mabi=64/-mno-abicalls, indeed those tests pass.


-mabicalls might change the output because of things like the $gp save slot,
but if that's the only reason it might be better to add addressing=absolute
to the tests instead.


OK, attached patch implements that, that fixes the -mabi=64 problem.

What do we do about the 'save' instead of 'sw' ?

Thanks,
- Tom



Thanks,
Richard



2014-07-09  Tom de Vries  t...@codesourcery.com

	* gcc.target/mips/fuse-caller-save.c: Add addression=absolute to
	dg-options.
	* gcc.target/mips/fuse-caller-save-mips16.c: Same.
	* gcc.target/mips/fuse-caller-save-micromips.c: Same.

diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
index 6ad01c7..70349dc 100644
--- a/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -fuse-caller-save (-mmicromips) } */
+/* { dg-options -fuse-caller-save (-mmicromips) addressing=absolute } */
 /* At -O0 and -O1, the register allocator behaves more conservatively, and
the fuse-caller-save optimization doesnt' trigger.  */
 /* { dg-skip-if  { *-*-* }  { -O0 -O1 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c
index a7c6cf4..330d325 100644
--- a/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save-mips16.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -fuse-caller-save (-mips16) } */
+/* { dg-options -fuse-caller-save (-mips16) addressing=absolute } */
 /* At -O0 and -O1, the register allocator behaves more conservatively, and
the fuse-caller-save optimization doesnt' trigger.  */
 /* { dg-skip-if  { *-*-* }  { -O0 -O1 } } */
diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save.c b/gcc/testsuite/gcc.target/mips/fuse-caller-save.c
index 72c08fe..370b1ee 100644
--- a/gcc/testsuite/gcc.target/mips/fuse-caller-save.c
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -fuse-caller-save } */
+/* { dg-options -fuse-caller

Re: Simplify gcc.target/mips/fuse-caller-save*.c

2014-07-10 Thread Tom de Vries

On 09-07-14 23:06, Tom de Vries wrote:

On 09-07-14 20:32, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

Richard,

during testing the gcc.target/mips/fuse-caller-save*.c test-cases with more
combinations of -march, -mabi, -fpic etc, I found that the checks for amount of
stores are rather fragile, so I removed them in this patch.


Which combinations specifically?  I don't see offhand why -march would
make a difference,


--target_board=unix/-march=mips32:
...
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O2   scan-assembler-times
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O3 -fomit-frame-pointer
scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O3 -g   scan-assembler-times
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -Os   scan-assembler-times
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O2 -flto -flto-partition=none
   scan-assembler-times (?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
FAIL: gcc.target/mips/fuse-caller-save-mips16.c   -O2 -flto scan-assembler-times
(?n)s[wd]\t\\$.*,.*\\(\\$sp\\) 2
...

We're using save instead of sw.


SNIP


What do we do about the 'save' instead of 'sw' ?



The mips16e save/restore enabling is controlled by this code in mips.h:
...
/* Generate mips16e code. Default 16bit ASE for mips32* and mips64* */
#define GENERATE_MIPS16E(TARGET_MIPS16  mips_isa = 32)
/* Generate mips16e register save/restore sequences.  */
#define GENERATE_MIPS16E_SAVE_RESTORE (GENERATE_MIPS16E  mips_abi == ABI_32)
...

Adding isa_rev=0 in dg-options works. Is that the way to fix it?

Thanks,
- Tom



Re: -fuse-caller-save - Collect register usage information

2014-07-13 Thread Tom de Vries

On 19-06-14 21:45, Richard Henderson wrote:

On 06/19/2014 12:36 PM, Jan Hubicka wrote:

On 06/19/2014 09:06 AM, Tom de Vries wrote:


2014-06-19  Tom de Vries  t...@codesourcery.com

* final.c (collect_fn_hard_reg_usage): Don't save function_used_regs if
it contains all call_used_regs.


Ok.


When we now have way to represent different reg usages for functions, what 
would be best
way to make local functions to default into saving some SSE registers on 
x86/x86-64?


I wouldn't do that at all.  Leave all sse registers call-clobbered.  This way
you don't need to have different entry points (or one possibly less efficient
entry point) when a function is used both locally and globally.

What I would investigate is how to use this hard reg usage data in the register
allocator.  If we know that the callee only uses xmm0-xmm4, then we can keep
xmm5-xmm15 live across the call.



AFAIU, what you describe here already works. This patch contains a version of 
the fuse-caller-save test with xmm registers. The callee bar only uses xmm0, and 
caller foo keeps xmm1 live across the call.


OK for trunk?

Thanks,
- Tom

2014-07-13  Tom de Vries  t...@codesourcery.com

	* gcc.target/i386/fuse-caller-save-xmm-run.c: New test.
	* gcc.target/i386/fuse-caller-save-xmm.c: New test.

diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm-run.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm-run.c
new file mode 100644
index 000..17385fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm-run.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options -O2 -msse -fuse-caller-save } */
+
+typedef double v2df __attribute__((vector_size (16)));
+
+static v2df __attribute__((noinline))
+bar (v2df a)
+{ 
+  return a + (v2df){ 3.0, 3.0 };
+}
+
+v2df __attribute__((noinline))
+foo (v2df y)
+{
+  return y + bar (y);
+}
+
+int
+main (void)
+{
+  int success;
+  union {
+v2df v;
+double d[2];
+  } u;
+
+  u.v = foo ((v2df){ 5.0, 5.0});
+  success = (u.d[0] == 13.0
+	  u.d[1] == 13.0);
+
+  return !success;
+}
+
+
diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
new file mode 100644
index 000..de1ca63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -msse -fuse-caller-save } */
+
+typedef double v2df __attribute__((vector_size (16)));
+
+static v2df __attribute__((noinline))
+bar (v2df a)
+{ 
+  return a + (v2df){ 3.0, 3.0 };
+}
+
+v2df __attribute__((noinline))
+foo (v2df y)
+{
+  return y + bar (y);
+}
+
+int
+main (void)
+{
+  int success;
+  union {
+v2df v;
+double d[2];
+  } u;
+
+  u.v = foo ((v2df){ 5.0, 5.0});
+  success = (u.d[0] == 13.0
+	  u.d[1] == 13.0);
+
+  return !success;
+}
+
+/* { dg-final { scan-assembler-not movaps\t%xmm1, \\(%rsp\\) } } */
+/* { dg-final { scan-assembler-not movapd\t\\(%rsp\\), %xmm1 } } */
+/* { dg-final { scan-assembler-times .cfi_def_cfa_offset 16 1 } } */
+/* { dg-final { scan-assembler-times .cfi_def_cfa_offset 32 1 } } */
+


[PATCH, i386, PR61827] Fix fuse-caller-save-xmm.c test-case

2014-07-20 Thread Tom de Vries

Uros,

this patch fixes the problems in test-case 
gcc.target/i386/fuse-caller-save-xmm.c reported in PR 61827. I've removed the 
checks for cfi_def_cfa_offset, which were not robust enough for the different 
configurations.


Furthermore, I've:
- added checks for all insns that handle the xmm registers, to make sure we're
  actually using the xmm1 register.
- fixed the scan-assembler-not lines to allow both %esp and %rsp.
- removed main, which was really only intended for the
  fuse-caller-save-xmm-run.c test-case.

Tested with -m32 and -m64.

OK for trunk?

Thanks,
- Tom
2014-07-20  Tom de Vries  t...@codesourcery.com

	PR target/61827
	* gcc.target/i386/fuse-caller-save-xmm.c: Add checks for insns with xmm
	registers.  Remove cfi_def_cfa_offset checks.  Generalize checks
	containing %rsp.
	(main): Remove.

diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
index ff21f0c..3754b01 100644
--- a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
@@ -15,23 +15,12 @@ foo (v2df y)
   return y + bar (y);
 }
 
-int
-main (void)
-{
-  int success;
-  union {
-v2df v;
-double d[2];
-  } u;
-
-  u.v = foo ((v2df){ 5.0, 5.0});
-  success = (u.d[0] == 13.0
-	  u.d[1] == 13.0);
-
-  return !success;
-}
+/* Check presence of all insns on xmm registers.  These checks are expected to
+   pass with both -fuse-caller-save and -fno-use-caller-save.  */
+/* { dg-final { scan-assembler-times addpd\t\\.LC0.*, %xmm0 1 } } */
+/* { dg-final { scan-assembler-times addpd\t%xmm1, %xmm0 1 } } */
+/* { dg-final { scan-assembler-times movapd\t%xmm0, %xmm1 1 } } */
 
-/* { dg-final { scan-assembler-not movaps\t%xmm1, \\(%rsp\\) } } */
-/* { dg-final { scan-assembler-not movapd\t\\(%rsp\\), %xmm1 } } */
-/* { dg-final { scan-assembler-times .cfi_def_cfa_offset 16 1 } } */
-/* { dg-final { scan-assembler-times .cfi_def_cfa_offset 32 1 } } */
+/* Check absence of save/restore of xmm1 register.  */
+/* { dg-final { scan-assembler-not movaps\t%xmm1, \\(%.sp\\) } } */
+/* { dg-final { scan-assembler-not movapd\t\\(%.sp\\), %xmm1 } } */


Re: [PATCH, i386, PR61827] Fix fuse-caller-save-xmm.c test-case

2014-07-21 Thread Tom de Vries

On 21-07-14 09:31, Uros Bizjak wrote:

On Sun, Jul 20, 2014 at 12:25 PM, Tom de Vries tom_devr...@mentor.com wrote:


this patch fixes the problems in test-case
gcc.target/i386/fuse-caller-save-xmm.c reported in PR 61827. I've removed
the checks for cfi_def_cfa_offset, which were not robust enough for the
different configurations.

Furthermore, I've:
- added checks for all insns that handle the xmm registers, to make sure
we're
   actually using the xmm1 register.
- fixed the scan-assembler-not lines to allow both %esp and %rsp.


You can use %\[re\]?sp here. We know that only r and e are valid.


- removed main, which was really only intended for the
   fuse-caller-save-xmm-run.c test-case.

Tested with -m32 and -m64.


Probably you should also add -fomit-frame-pointer, otherwise the test
(that checks for SP based address) will fail on Darwin and Solaris
that default to frame pointers.


OK for trunk?


OK with the above changes.



Uros,

Dominique noticed that the .LC0 check failed on darwin, since the label LC0 is 
used. This follow-up patch fixes that (and I see now you already Ok-ed this change).


Furthermore, I've realized from the comments in the PR that for solaris/sun-as 
and darwin no cfi directives are generated. There are two other i386 
fuse-caller-save tests which test cfi directives. The reason these tests aren't 
failing for solaris/sun-as and darwin like the fuse-caller-save-xmm test did, is 
because they test for the absence of specific cfi directives, which will always 
pass if no cfi directives are generated. So I've removed the cfi directive 
checks (and removed superfluous mains) and added tests on instructions.


Tested with -m32 and -m64.

OK for trunk?

Thanks,
- Tom

2014-07-21  Tom de Vries  t...@codesourcery.com

	PR target/61827
	* gcc.target/i386/fuse-caller-save-xmm.c: Allow LC0 without dot prefix
	for darwin in scan-assembler-times check.
	* gcc.target/i386/fuse-caller-save.c: Remove cfi-related
	scan-assembler-not checks.  Add checks for insns.
	(main): Remove.
	* gcc.target/i386/fuse-caller-save-rec.c: Remove cfi-related
	scan-assembler-not checks.  Copy checks from i386/fuse-caller-save.c.
	(main): Remove.

diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
index b30a0b4..d1441bc 100644
--- a/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
@@ -18,14 +18,12 @@ foo (int y)
   return y + bar (y);
 }
 
-int
-main (void)
-{
-  return !(foo (5) == 13);
-}
+/* Check that no registers are saved/restored. */
+/* { dg-final { scan-assembler-not push  } } */
+/* { dg-final { scan-assembler-not pop  } } */
 
-/* Verify that no registers where saved on stack.  */
-/* { dg-final { scan-assembler-not \.cfi_offset  } } */
+/* Check that addition uses dx. */
+/* { dg-final { scan-assembler-times addl\t%\[re\]?dx, %\[re\]?ax 1 } } */
 
 /* Verify that bar is self-recursive.  */
 /* { dg-final { scan-assembler-times call\tbar 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
index c639936..4211a89 100644
--- a/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-xmm.c
@@ -17,7 +17,7 @@ foo (v2df y)
 
 /* Check presence of all insns on xmm registers.  These checks are expected to
pass with both -fuse-caller-save and -fno-use-caller-save.  */
-/* { dg-final { scan-assembler-times addpd\t\\.LC0.*, %xmm0 1 } } */
+/* { dg-final { scan-assembler-times addpd\t\\.?LC0.*, %xmm0 1 } } */
 /* { dg-final { scan-assembler-times addpd\t%xmm1, %xmm0 1 } } */
 /* { dg-final { scan-assembler-times movapd\t%xmm0, %xmm1 1 } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save.c
index 4ec4995..7e2b11d 100644
--- a/gcc/testsuite/gcc.target/i386/fuse-caller-save.c
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save.c
@@ -16,12 +16,9 @@ foo (int y)
   return y + bar (y);
 }
 
-int
-main (void)
-{
-  return !(foo (5) == 13);
-}
-
-/* { dg-final { scan-assembler-not \.cfi_def_cfa_offset  } } */
-/* { dg-final { scan-assembler-not \.cfi_offset  } } */
+/* Check that no registers are saved/restored. */
+/* { dg-final { scan-assembler-not push  } } */
+/* { dg-final { scan-assembler-not pop  } } */
 
+/* Check that addition uses dx. */
+/* { dg-final { scan-assembler-times addl\t%\[re\]?dx, %\[re\]?ax 1 } } */



Re: [PATCH, i386, PR61827] Fix fuse-caller-save-xmm.c test-case

2014-07-21 Thread Tom de Vries

On 21-07-14 12:40, Uros Bizjak wrote:

On Mon, Jul 21, 2014 at 12:34 PM, Tom de Vries tom_devr...@mentor.com wrote:


Dominique noticed that the .LC0 check failed on darwin, since the label LC0
is used. This follow-up patch fixes that (and I see now you already Ok-ed
this change).

Furthermore, I've realized from the comments in the PR that for
solaris/sun-as and darwin no cfi directives are generated. There are two
other i386 fuse-caller-save tests which test cfi directives. The reason
these tests aren't failing for solaris/sun-as and darwin like the
fuse-caller-save-xmm test did, is because they test for the absence of
specific cfi directives, which will always pass if no cfi directives are
generated. So I've removed the cfi directive checks (and removed superfluous
mains) and added tests on instructions.


Tested with -m32 and -m64.

OK for trunk?


This is OK.



Dominique noticed a symbol matching problem on darwin for 
fuse-caller-save-rec.c.

I've committed this followup patch that fixes that problem.

Thanks,
- Tom

2014-07-21  Tom de Vries  t...@codesourcery.com

	PR target/61827
	* gcc.target/i386/fuse-caller-save-rec.c: Allow underscore prefix to bar
	symbol in scan-assembler-times call check.

diff --git a/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c b/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
index d1441bc..a0fcf9c 100644
--- a/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
+++ b/gcc/testsuite/gcc.target/i386/fuse-caller-save-rec.c
@@ -26,5 +26,4 @@ foo (int y)
 /* { dg-final { scan-assembler-times addl\t%\[re\]?dx, %\[re\]?ax 1 } } */
 
 /* Verify that bar is self-recursive.  */
-/* { dg-final { scan-assembler-times call\tbar 2 } } */
-
+/* { dg-final { scan-assembler-times call\t_?bar 2 } } */


[PATCH][gomp4] Optimize expand_omp_for_static_chunk for chunk_size one

2014-07-28 Thread Tom de Vries
Jakub,

we're using expand_omp_for_static_chunk with a chunk_size of one to expand the
openacc loop construct.

This results in an inner and outer loop being generated, with the inner loop
having a trip count of one, which means that the inner loop can be simplified to
just the inner loop body. However, subsequent optimizations do not manage to do
this simplification.

This patch sets the loop exit condition to true if the chunk_size is one, to
ensure that the compiler will optimize away the inner loop.

OK for gomp4 branch?

Thanks,
- Tom
2014-07-25  Tom de Vries  t...@codesourcery.com

	* omp-low.c (expand_omp_for_static_chunk): Remove inner loop if
	chunk_size is one.

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b188e2d..5a73986 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -7345,9 +7345,14 @@ expand_omp_for_static_chunk (struct omp_region *region,
 	  stmt = gimple_build_assign (vback, t);
 	  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 
-	  t = build2 (fd-loop.cond_code, boolean_type_node,
-		  DECL_P (vback)  TREE_ADDRESSABLE (vback)
-		  ? t : vback, e);
+	  if (tree_int_cst_equal (fd-chunk_size, integer_one_node))
+	t = build2 (EQ_EXPR, boolean_type_node,
+			build_int_cst (itype, 0),
+			build_int_cst (itype, 1));
+	  else
+	t = build2 (fd-loop.cond_code, boolean_type_node,
+			DECL_P (vback)  TREE_ADDRESSABLE (vback)
+			? t : vback, e);
 	  gsi_insert_before (gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
 	}
 


[PATCH][gomp4] Fix bootstrap

2014-07-28 Thread Tom de Vries

Hi,

this patch removes some unused variables and fixes bootstrap of the 
gomp-4_0-branch.

Committed to gomp-4_0-branch as trivial.

Thanks,
- Tom
2014-07-28  Tom de Vries  t...@codesourcery.com

	* omp-low.c (process_reduction_data): Remove unused variables.

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b188e2d..927522c 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -9696,8 +9696,6 @@ process_reduction_data (gimple_seq *body, gimple_seq *in_stmt_seqp,
   switch (gimple_code (stmt))
 	{
 	case GIMPLE_OMP_FOR:
-	  tree clauses, nthreads, t;
-
 	  clauses = gimple_omp_for_clauses (stmt);
 
 	  /* Search for a reduction clause.  */


Fix gcc_assert in expand_omp_for_static_chunk

2014-11-12 Thread Tom de Vries

Jakub,

this patch fixes a gcc_assert in expand_omp_for_static_chunk.

The assert follows a loop with composite loop condition:
...
  vecedge_var_map *head = redirect_edge_var_map_vector (re);
  ene = single_succ_edge (entry_bb);

  psi = gsi_start_phis (fin_bb);
  for (i = 0; !gsi_end_p (psi)  head-iterate (i, vm);
   gsi_next (psi), ++i)
...

AFAIU, the intention of the assert is that it tries to check that both:
- all phis have been handled (gsi_end_p (psi)), and
- all elements of head have been used (head-length () == i).
In other words, that we have stopped iterating because both loop conditions are 
false.


The current assert checks that *not* all phis have been handled:
...
  gcc_assert (!gsi_end_p (psi)  i == head-length ());
...

Looking back in the history, it seems we started out with the 'all phis handled' 
semantics, but I suspect that that got lost due to a typo:

...
79acaae1 2007-09-07
  gcc_assert (!phi  !args);

75a70cf95 2008-07-28
  gcc_assert (!gsi_end_p (psi)  i == VEC_length (edge_var_map, head));

f1f41a6c 2012-11-18
  gcc_assert (!gsi_end_p (psi)  i == head-length ());
...

Now, if the current assert is incorrect, why didn't it trigger?

The assert is in ssa-handling code in expand_omp_for_static_chunk. Ssa-handling 
code in omp-low.c is only triggered by pass_parallelize_loops, and that pass 
doesn't specify a chunk size on the GIMPLE_OMP_FOR it constructs, so that will 
only call expand_omp_for_static_nochunk.


I managed to trigger this assert in my oacc kernels directive patch set (on top 
of the gomp-4_0-branch), which constructs an oacc for loop in 
pass_parallelize_loops, and then this code in gomp-4_0-branch has the effect 
that we trigger expand_omp_for_static_chunk:

...
//TODO 


  /* For OpenACC loops, force a chunk size of one, as this avoids the default
 scheduling where several subsequent iterations are being executed by the
 same thread.  */
  if (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
{
  gcc_assert (fd-chunk_size == NULL_TREE);
  fd-chunk_size = build_int_cst (TREE_TYPE (fd-loop.v), 1);
}
...

So, AFAIU, this assert (and associated ssa-handling code in 
expand_omp_for_static_chunk) is dead on trunk, but I'm excercising the code 
currently in my patch series, so I'd prefer to fix it rather than remove it.


Bootstrapped and reg-tested on x86_64, on top of trunk, gomp-4_0-branch and 
internal oacc dev branch.


OK for trunk?

Thanks,
- Tom
2014-11-12  Tom de Vries  t...@codesourcery.com

	* omp-low.c (expand_omp_for_static_chunk): Fix assert.
---
 gcc/omp-low.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b59d069..5210de1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6775,7 +6775,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
 	  locus = redirect_edge_var_map_location (vm);
 	  add_phi_arg (nphi, redirect_edge_var_map_def (vm), re, locus);
 	}
-  gcc_assert (!gsi_end_p (psi)  i == head-length ());
+  gcc_assert (gsi_end_p (psi)  i == head-length ());
   redirect_edge_var_map_clear (re);
   while (1)
 	{
-- 
1.9.1



Run pass_expand_omp_ssa after pass_paralellize_loops

2014-11-12 Thread Tom de Vries

[ moved from gcc@ to gcc-patches@ ]
[ subject was: Re: [gomp4] openacc kernels directive support ]
On 30-09-14 15:37, Tom de Vries wrote:

I would be happily accepting splitting the current autopar pass
that way, that is, do

NEXT_PASS (pass_parallelize_loops)
PUSH_INSERT_PASSES_WITHIN (pass_parallelize_loops)
NEXT_PASS (pass_expand_omp)
POP_INSERT_PASSES ()

and make the analysis code handle lowered OMP form.



To explore that, I created a tentative patch on top of the gomp-4_0-branch,
which allows a non-bootstrap build and a gcc dg.exp run, so at least a couple of
parloops test-cases. I can put this through bootstrap and reg-test if you
confirm this patch is what you want.

I'm not sure though OACC and autopar can share the actual function split-off.
autopar is run rather late, later than the lto-stream point, while we need the
split-off done before that for oacc. I'm also not sure what the point would be
to have lowered OMP form in all those passes in between, I'd think you want to
omp-expand it asap.


Richard,

This patch implements your proposal. It uses pass_expand_omp after 
pass_parallelize_loops to expand the omp constructs inserted by 
pass_parallelize_loops.


Note: the patch doesn't remove omp_expand_local, since I'm still using that in 
my oacc kernels directive patch series.


Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
2014-11-12  Tom de Vries  t...@codesourcery.com

	* function.h (struct function): Add omp_expand_needed field.
	* omp-low.c (pass_data pass_data_expand_omp_ssa): New pass_data.
	(class pass_expand_omp_ssa): New pass.
	(make_pass_expand_omp_ssa): New function.
	* passes.def (pass_parallelize_loops): Use PUSH_INSERT_PASSES_WITHIN
	instead of NEXT_PASS.
	(pass_expand_omp_ssa): Add after pass_parallelize_loops.
	* tree-parloops.c (gen_parallel_loop): Remove call to omp_expand_local.
	(pass_parallelize_loops::execute): Don't do cleanups TODO_cleanup_cfg
	and TODO_rebuild_alias yet.  Add TODO_update_ssa.  Set
	cfun-omp_expand_needed.
	* tree-pass.h (make_pass_expand_omp_ssa): Declare.
---
 gcc/function.h  |  3 +++
 gcc/omp-low.c   | 42 ++
 gcc/passes.def  |  3 +++
 gcc/tree-parloops.c | 18 +++---
 gcc/tree-pass.h |  1 +
 5 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/gcc/function.h b/gcc/function.h
index 3a6305c..1afce69 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -667,6 +667,9 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when an omp_expand is needed.  */
+  unsigned int omp_expand_needed : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 5210de1..b748ee1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -8832,6 +8832,48 @@ make_pass_expand_omp (gcc::context *ctxt)
 {
   return new pass_expand_omp (ctxt);
 }
+
+namespace {
+
+const pass_data pass_data_expand_omp_ssa =
+{
+  GIMPLE_PASS, /* type */
+  ompexpssa, /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg | PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
+};
+
+class pass_expand_omp_ssa : public gimple_opt_pass
+{
+public:
+  pass_expand_omp_ssa (gcc::context *ctxt)
+: gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return (bool)cfun-omp_expand_needed; }
+
+  virtual unsigned int execute (function *)
+{
+  unsigned res = execute_expand_omp ();
+  cfun-omp_expand_needed = 0;
+  return res;
+}
+
+}; // class pass_expand_omp_ssa
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_expand_omp_ssa (gcc::context *ctxt)
+{
+  return new pass_expand_omp_ssa (ctxt);
+}
 
 /* Routines to lower OpenMP directives into OMP-GIMPLE.  */
 
diff --git a/gcc/passes.def b/gcc/passes.def
index 2305d67..dd91718 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -241,6 +241,9 @@ along with GCC; see the file COPYING3.  If not see
 	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_iv_canon);
 	  NEXT_PASS (pass_parallelize_loops);
+	  PUSH_INSERT_PASSES_WITHIN (pass_parallelize_loops)
+	  NEXT_PASS (pass_expand_omp_ssa);
+	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_if_conversion);
 	  /* pass_vectorize must immediately follow pass_if_conversion.
 	 Please do not add any other passes in between.  */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 09b3f16..c5e3041 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1753,7 +1753,6 @@ gen_parallel_loop (struct loop *loop,
   tree many_iterations_cond, type, nit;
   tree arg_struct, new_arg_struct;
   gimple_seq stmts;
-  basic_block parallel_head;
   edge entry, exit;
   struct clsn_data clsn_data;
   unsigned prob;
@@ -1891,8 +1890,8

Re: Run pass_expand_omp_ssa after pass_paralellize_loops

2014-11-12 Thread Tom de Vries

On 12-11-14 15:17, Richard Biener wrote:

On Wed, 12 Nov 2014, Tom de Vries wrote:


[ moved from gcc@ to gcc-patches@ ]
[ subject was: Re: [gomp4] openacc kernels directive support ]
On 30-09-14 15:37, Tom de Vries wrote:

I would be happily accepting splitting the current autopar pass
that way, that is, do

NEXT_PASS (pass_parallelize_loops)
PUSH_INSERT_PASSES_WITHIN (pass_parallelize_loops)
NEXT_PASS (pass_expand_omp)
POP_INSERT_PASSES ()

and make the analysis code handle lowered OMP form.



To explore that, I created a tentative patch on top of the gomp-4_0-branch,
which allows a non-bootstrap build and a gcc dg.exp run, so at least a
couple of
parloops test-cases. I can put this through bootstrap and reg-test if you
confirm this patch is what you want.

I'm not sure though OACC and autopar can share the actual function
split-off.
autopar is run rather late, later than the lto-stream point, while we need
the
split-off done before that for oacc. I'm also not sure what the point would
be
to have lowered OMP form in all those passes in between, I'd think you want
to
omp-expand it asap.


Richard,

This patch implements your proposal. It uses pass_expand_omp after
pass_parallelize_loops to expand the omp constructs inserted by
pass_parallelize_loops.

Note: the patch doesn't remove omp_expand_local, since I'm still using that in
my oacc kernels directive patch series.

Bootstrapped and reg-tested on x86_64.

OK for trunk?


Hmm, we have used properties to communicate this kind of lowering
need in the past.  So I would prefer you introduce

#define PROP_gimple_eomp (1  13)  /* no OpenMP directives */

provide that property by the omp expansion pass, clear it from
parloops and gate the omp expand pass if the property is already set.

Look at how PROP_gimple_lcx is handled.



Richard,

I've followed up on your (and David's indentation) comment.

The patch now defines a property PROP_gimple_eomp, and uses it to communicate 
the need for expansion of omp constructs between passes.


Bootstrapped and regtested on x86_64.

OK for trunk?

Thanks,
- Tom

2014-11-12  Tom de Vries  t...@codesourcery.com

	* omp-low.c (pass_data_expand_omp): Set properties_provided to
	PROP_gimple_eomp.
	(pass_expand_omp::gate): Remove function.  Move gate expression to ...
	(pass_expand_omp::execute): ... here, as new variable gate.  Add early
	exit if gate is false.
	(pass_data pass_data_expand_omp_ssa): New pass_data.
	(class pass_expand_omp_ssa): New pass.
	(make_pass_expand_omp_ssa): New function.
	* passes.def (pass_parallelize_loops): Use PUSH_INSERT_PASSES_WITHIN
	instead of NEXT_PASS.
	(pass_expand_omp_ssa): Add after pass_parallelize_loops.
	* tree-parloops.c (gen_parallel_loop): Remove call to omp_expand_local.
	(pass_parallelize_loops::execute): Don't do cleanups TODO_cleanup_cfg
	and TODO_rebuild_alias yet.  Add TODO_update_ssa.  Set
	cfun-omp_expand_needed.
	* tree-pass.h: Add define PROP_gimple_eomp.
	(make_pass_expand_omp_ssa): Declare.
---
 gcc/omp-low.c   | 56 +++--
 gcc/passes.def  |  3 +++
 gcc/tree-parloops.c | 18 +++--
 gcc/tree-pass.h |  2 ++
 4 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 5210de1..3afc138 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -8801,7 +8801,7 @@ const pass_data pass_data_expand_omp =
   OPTGROUP_NONE, /* optinfo_flags */
   TV_NONE, /* tv_id */
   PROP_gimple_any, /* properties_required */
-  0, /* properties_provided */
+  PROP_gimple_eomp, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
   0, /* todo_flags_finish */
@@ -8815,13 +8815,18 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *)
+  virtual unsigned int execute (function *)
 {
-  return ((flag_openmp != 0 || flag_openmp_simd != 0
-	   || flag_cilkplus != 0)  !seen_error ());
-}
+  bool gate = ((flag_openmp != 0 || flag_openmp_simd != 0
+		|| flag_cilkplus != 0)  !seen_error ());
 
-  virtual unsigned int execute (function *) { return execute_expand_omp (); }
+  /* This pass always runs, to provide PROP_gimple_eomp.
+	 But there is nothing to do unless -fopenmp is given.  */
+  if (!gate)
+	return 0;
+
+  return execute_expand_omp ();
+}
 
 }; // class pass_expand_omp
 
@@ -8832,6 +8837,45 @@ make_pass_expand_omp (gcc::context *ctxt)
 {
   return new pass_expand_omp (ctxt);
 }
+
+namespace {
+
+const pass_data pass_data_expand_omp_ssa =
+{
+  GIMPLE_PASS, /* type */
+  ompexpssa, /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg | PROP_ssa, /* properties_required */
+  PROP_gimple_eomp, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
+};
+
+class pass_expand_omp_ssa : public gimple_opt_pass
+{
+public:
+  pass_expand_omp_ssa (gcc::context *ctxt

[PATCH] Fix patch mangling with --inline option in mklog

2014-11-14 Thread Tom de Vries

Diego,

I noticed that a patch processed with mklog --inline got mangled.

In mklog, first we read the .diff file into array diff_lines.  Then, in the case 
of --inline, at the end we expect diff_lines still to contain the .diff file. 
That's not the case however, and that causes the mangling.


The patch fixes this by copying the diff_lines before processing, and using the 
copy at the end to reproduce the .diff file.


Committed as obvious.

Thanks,
- Tom
2014-11-14  Tom de Vries  t...@codesourcery.com

	* mklog: Move reading of .diff file up and add comment.  Copy diff_lines
	to orig_diff_lines.  Use orig_diff_lines when appending patch.
---
 contrib/mklog | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/contrib/mklog b/contrib/mklog
index 8412d38..840f6f8 100755
--- a/contrib/mklog
+++ b/contrib/mklog
@@ -132,15 +132,23 @@ sub is_top_level {
 	return $function  $function !~ /^[\s{]/;
 }
 
+# Read contents of .diff file
+open (DFILE, $diff) or die Could not open file $diff for reading;
+chomp (my @diff_lines = DFILE);
+close (DFILE);
+
+# Array diff_lines is modified by the log generation, so save a copy in
+# orig_diff_lines if needed.
+if ($inline) {
+@orig_diff_lines = @diff_lines;
+}
+
 # For every file in the .diff print all the function names in ChangeLog
 # format.
 %cl_entries = ();
 $change_msg = undef;
 $look_for_funs = 0;
 $clname = get_clname('');
-open (DFILE, $diff) or die Could not open file $diff for reading;
-chomp (my @diff_lines = DFILE);
-close (DFILE);
 $line_idx = 0;
 foreach (@diff_lines) {
 # Stop processing functions if we found a new file.
@@ -313,7 +321,7 @@ foreach my $clname (keys %cl_entries) {
 
 if ($inline) {
 	# Append the patch to the log
-	foreach (@diff_lines) {
+	foreach (@orig_diff_lines) {
 		print OUTPUTFILE $_\n;
 	}
 }
-- 
1.9.1



[PATCH, gomp4, committed] Fix libgomp/plugin/../configure.tgt unary operator expected error

2014-11-14 Thread Tom de Vries

Thomas,

This patches fixes this error while building the gomp-4_0-branch:
...
ref-gomp-4_0-branch-14-11-13/src/libgomp/plugin/../configure.tgt: line 30: test: 
=: unary operator expected

...

The error occurs in this piece of code:
...
# Check for futex enabled all at once.
if test $enable_linux_futex = yes; then
...

The patch makes sure we can handle the case that enable_linux_futex is empty.

Committed as obvious.

Thanks,
- Tom
2014-11-14  Tom de Vries  t...@codesourcery.com

	* configure.tgt: Handle case that $enable_linux_futex is empty.
---
 libgomp/ChangeLog.gomp | 4 
 libgomp/configure.tgt  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp
index 492393b..abceb62 100644
--- a/libgomp/ChangeLog.gomp
+++ b/libgomp/ChangeLog.gomp
@@ -1,3 +1,7 @@
+2014-11-14  Tom de Vries  t...@codesourcery.com
+
+	* configure.tgt: Handle case that $enable_linux_futex is empty.
+
 2014-11-14  Thomas Schwinge  tho...@codesourcery.com
 
 	* libgomp.texi (Reporting Bugs): Mention the openacc Bugzilla
diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index 8b18417..d3511d7 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -27,7 +27,7 @@ fi
 config_path=posix
 
 # Check for futex enabled all at once.
-if test $enable_linux_futex = yes; then
+if test x$enable_linux_futex = xyes; then
   case ${target} in
 
 aarch64*-*-linux*)
-- 
1.9.1



[PATCH, gomp4, committed] Fix Can't rename module file openacc_internal.mod0 error

2014-11-14 Thread Tom de Vries

Thomas,

while bootstrapping the gomp-4_0-branch, I ran into this error:
...
libtool: compile: build/./gcc/gfortran -Bbuild/./gcc/ 
-Binstall/x86_64-unknown-linux-gnu/bin/ -Binstall/x86_64-unknown-linux-gnu/lib/ 
-isystem install/x86_64-unknown-linux-gnu/include -isystem 
install/x86_64-unknown-linux-gnu/sys-include -m32 -L. -Wall -L../libgfortran 
-m32 -c src/libgomp/openacc.f90 -fPIC -o .libs/openacc.o
build/./gcc/gfortran -Bbuild/./gcc/ -Binstall/x86_64-unknown-linux-gnu/bin/ 
-Binstall/x86_64-unknown-linux-gnu/lib/ -isystem 
install/x86_64-unknown-linux-gnu/include -isystem 
install/x86_64-unknown-linux-gnu/sys-include  -m32 -L. -Wall -L../libgfortran 
-m32 -fsyntax-only src/libgomp/openacc.f90
Fatal Error: Can't rename module file 'openacc_internal.mod0' to 
'openacc_internal.mod': No such file or directory

make[9]: *** [openacc.mod] Error 1
make[9]: *** Waiting for unfinished jobs
src/libgomp/openacc.f90:621.6:

  use openacc_internal, only: acc_get_num_devices_l
  1
Fatal Error: Can't open module file 'openacc_internal.mod' for reading at (1): 
No such file or directory

make[9]: *** [openacc.lo] Error 1
...

AFAIU, we compile fopenacc.f90 twice in the same working directory, once using 
this rule to obtain openacc.mod:

...
%.mod: %.f90
$(FC) $(FCFLAGS) -fsyntax-only $
...
And once more to obtain openacc.lo.

AFAIU, given the absence of -J, both will use temporary file fopenacc.mod0, 
which explains the error.


If we first build fopenacc.mod, we'll produce openacc.mod file, which is then 
overwritten by building fopenacc.lo. If we first build fopenacc.lo, we already 
have fopenacc.mod, and we'll not build another openacc.mod. So in both cases, we 
use the fopenacc.mod produced by building openacc.lo.


It's only when we try to build both simultaneously that we can run into his 
error.

And, we don't need to build both, since building fopenacc.lo already produces 
fopenacc.mod.


This patch tells make that building fopenacc.lo produces fopenacc.mod, and that 
fixes the error.


Committed as obvious.

Thanks,
- Tom
2014-11-14  Tom de Vries  t...@codesourcery.com

	* Makefile.am: Add missing dependency openacc.mod: openacc.lo.
	* Makefile.in: Regenerate.
---
 libgomp/ChangeLog.gomp | 5 +
 libgomp/Makefile.am| 2 ++
 libgomp/Makefile.in| 2 ++
 3 files changed, 9 insertions(+)

diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp
index abceb62..b2c2526 100644
--- a/libgomp/ChangeLog.gomp
+++ b/libgomp/ChangeLog.gomp
@@ -1,5 +1,10 @@
 2014-11-14  Tom de Vries  t...@codesourcery.com
 
+	* Makefile.am: Add missing dependency openacc.mod: openacc.lo.
+	* Makefile.in: Regenerate.
+
+2014-11-14  Tom de Vries  t...@codesourcery.com
+
 	* configure.tgt: Handle case that $enable_linux_futex is empty.
 
 2014-11-14  Thomas Schwinge  tho...@codesourcery.com
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index f265c5d..e5411ff 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -85,6 +85,8 @@ omp_lib_kinds.mod: omp_lib.mod
 	:
 openacc_kinds.mod: openacc.mod
 	:
+openacc.mod: openacc.lo
+	:
 %.mod: %.f90
 	$(FC) $(FCFLAGS) -fsyntax-only $
 fortran.lo: libgomp_f.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index ea3e1ca..9a46373 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -1122,6 +1122,8 @@ omp_lib_kinds.mod: omp_lib.mod
 	:
 openacc_kinds.mod: openacc.mod
 	:
+openacc.mod: openacc.lo
+	:
 %.mod: %.f90
 	$(FC) $(FCFLAGS) -fsyntax-only $
 fortran.lo: libgomp_f.h
-- 
1.9.1



openacc kernels directive -- initial support

2014-11-15 Thread Tom de Vries

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of 
loops in the oacc kernels region.


The patch series consists of these 8 patches:
...
1  Expand oacc kernels after pass_build_ealias
2  Add pass_oacc_kernels
3  Add pass_ch_oacc_kernels to pass_oacc_kernels
4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
5  Add pass_loop_im to pass_oacc_kernels
6  Add pass_ccp to pass_oacc_kernels
7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
8  Do simple omp lowering for no address taken var
...

The patch series does not yet apply cleanly to trunk, since it's dependent on 
the oacc middle end changes present in the gomp-4_0-branch, already submitted by 
Thomas for trunk.


Furthermore, it's dependent on an assert fix submitted for trunk ('Fix 
gcc_assert in expand_omp_for_static_chunk' @ 
https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01149.html ).


The patch series is intended for trunk, but - given the dependency on the oacc 
middle end changes - has been bootstrapped for x86_64 on top of gomp-4_0-branch.


I'll post the patch series in reply to this email.

Thanks,
- Tom

[ FTR  In order to get clean libgomp and goacc test results in gomp-4_0-branch, 
to have a good basis for testing, I used the following patch set:


 Don't allow flto-partition=balance for fopenacc
   Unsubmitted. This works around a compilation problem for
   libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c that I ran into on
   our internal dev branch.  I'll investigate whether I can reproduce with
   gomp-4_0-branch asap.

 Mark fopenacc as LTO option
   @ https://gcc.gnu.org/ml/gcc-patches/2014-11/msg00085.html   

 Only use nvidia accelerator if present
   @ https://gcc.gnu.org/ml/gcc-patches/2014-11/msg00247.html

 Set default LIBGOMP_PLUGIN_PATH
   @ https://gcc.gnu.org/ml/gcc-patches/2014-11/msg00242.html
]


[PATCH, 1/8] Expand oacc kernels after pass_build_ealias

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch moves omp expansion of the oacc kernels directive to after 
pass_build_ealias.


The rationale is that in order to use pass_parallelize_loops for analysis and 
transformation of an oacc kernels region, we postpone omp expansion of that 
region until the earliest point in the pass list where enough information is 
availabe to run pass_parallelize_loops, in other words, after pass_build_ealias.


The patch postpones expansion in expand_omp, and ensures expansion by adding 
pass_expand_omp_ssa:

- after pass_build_ealias, and
- after pass_all_early_optimizations for the case we're not optimizing.

In order to make sure the oacc kernels region arrives at pass_expand_omp_ssa, 
the way it left expand_omp, the patch makes pass_ccp and pass_forwprop aware of 
lowered omp code, to handle it conservatively.


The patch contains changes in expand_omp_target to deal with ssa-code, similar 
to what is already present in expand_omp_taskreg.


Furthermore, the patch forces the .omp_data_sizes and .omp_data_kinds to not be 
static for oacc kernels. It does this to get some references to .omp_data_sizes 
and .omp_data_kinds in the ssa code.  Without these references, the definitions 
will be removed. The reference of the variables in GIMPLE_OACC_KERNELS is not 
enough to have them not removed. [ In vries/oacc-kernels, I used a BUILT_IN_USE 
kludge for this purpose ].


Finally, at the end of pass_expand_omp_ssa we're left with SSA_NAMEs in the 
original function of which the definition has been removed (as in moved to the 
split off function). TODO_remove_unused_locals takes care of some of them, but 
not the anonymous ones. So the patch iterates over all SSA_NAMEs to find these 
dangling SSA_NAMEs and releases them.


OK for trunk?

Thanks,
- Tom
2014-11-14  Tom de Vries  t...@codesourcery.com

	* function.h (struct function): Add contains_oacc_kernels field.
	* gimplify.c (gimplify_omp_workshare): Set contains_oacc_kernels.
	* omp-low.c: Include gimple-pretty-print.h.
	(release_first_vuse_in_edge_dest): New function.
	(expand_omp_target): Handle ssa-code.
	(expand_omp): Don't expand GIMPLE_OACC_KERNELS when not in ssa.
	(pass_data_expand_omp): Don't set PROP_gimple_eomp unconditionally in
	properties_provided field.
	(pass_expand_omp::execute): Set PROP_gimple_eomp in
	cfun-curr_properties only if cfun does not contain oacc kernels.
	(pass_data_expand_omp_ssa): Add TODO_remove_unused_locals to
	todo_flags_finish field.
	(pass_expand_omp_ssa::execute): Release dandging SSA_NAMEs after calling
	execute_expand_omp.
	(lower_omp_target): Add static_arrays variable, init to 1.  Don't use
	static arrays for kernels directive.  Use static_arrays variable.
	Handle case that .omp_data_kinds is not static.
	(gimple_stmt_omp_lowering_p): New function.
	* omp-low.h (gimple_stmt_omp_lowering_p): Declare.
	* passes.def: Add pass_expand_omp_ssa after pass_build_ealias.
	* tree-ssa-ccp.c: Include omp-low.h.
	(surely_varying_stmt_p): Handle omp lowering code conservatively.
	* tree-ssa-forwprop.c: Include omp-low.h.
	(pass_forwprop::execute): Handle omp lowering code conservatively.
---
 gcc/function.h  |   3 +
 gcc/gimplify.c  |   1 +
 gcc/omp-low.c   | 194 +---
 gcc/omp-low.h   |   1 +
 gcc/passes.def  |   2 +
 gcc/tree-ssa-ccp.c  |   4 +
 gcc/tree-ssa-forwprop.c |   4 +-
 7 files changed, 196 insertions(+), 13 deletions(-)

diff --git a/gcc/function.h b/gcc/function.h
index 08ab761..a72c154 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -664,6 +664,9 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when the function contains oacc kernels directives.  */
+  unsigned int contains_oacc_kernels : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 2c8c666..52d7e6d 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7281,6 +7281,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
   break;
 case OACC_KERNELS:
   stmt = gimple_build_oacc_kernels (body, OACC_KERNELS_CLAUSES (expr));
+  cfun-contains_oacc_kernels = 1;
   break;
 case OACC_PARALLEL:
   stmt

[PATCH, 2/8] Add pass_oacc_kernels

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds a pass group pass_oacc_kernels.

The rationale is that we want a pass group to run oacc kernels region related 
(optimization) passes in.


OK for trunk?

Thanks,
- Tom

2014-11-14  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass group pass_oacc_kernels.
	* tree-pass.h (make_pass_oacc_kernels): Declare.
	* tree-ssa-loop.c (gate_oacc_kernels): New static function.
	(pass_data_oacc_kernels): New pass_data.
	(class pass_oacc_kernels): New pass.
	(make_pass_oacc_kernels): New function.
---
 gcc/passes.def  |  5 +
 gcc/tree-pass.h |  1 +
 gcc/tree-ssa-loop.c | 48 
 3 files changed, 54 insertions(+)

diff --git a/gcc/passes.def b/gcc/passes.def
index bce8591..1fdb70a 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -72,6 +72,11 @@ along with GCC; see the file COPYING3.  If not see
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	 execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
+	  /* Pass group that runs when there are oacc kernels in the
+	 function.  */
+	  NEXT_PASS (pass_oacc_kernels);
+	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_merge_phi);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index eaa69b4..0bae847 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -445,6 +445,7 @@ extern gimple_opt_pass *make_pass_strength_reduction (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_vtable_verify (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ubsan (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_sanopt (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_oacc_kernels (gcc::context *ctxt);
 
 /* IPA Passes */
 extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 758b5fc..c29aa22 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -157,6 +157,54 @@ make_pass_tree_loop (gcc::context *ctxt)
   return new pass_tree_loop (ctxt);
 }
 
+/* Gate for oacc kernels pass group.  */
+
+static bool
+gate_oacc_kernels (function *fn)
+{
+  if (!flag_openacc)
+return false;
+
+  return fn-contains_oacc_kernels;
+}
+
+/* The oacc kernels superpass.  */
+
+namespace {
+
+const pass_data pass_data_oacc_kernels =
+{
+  GIMPLE_PASS, /* type */
+  oacc_kernels, /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_TREE_LOOP, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_oacc_kernels : public gimple_opt_pass
+{
+public:
+  pass_oacc_kernels (gcc::context *ctxt)
+: gimple_opt_pass (pass_data_oacc_kernels, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *fn) { return gate_oacc_kernels (fn); }
+
+}; // class pass_oacc_kernels
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_kernels (gcc::context *ctxt)
+{
+  return new pass_oacc_kernels (ctxt);
+}
+
 /* The no-loop superpass.  */
 
 namespace {
-- 
1.9.1







[PATCH, 3/8] Add pass_ch_oacc_kernels to pass_oacc_kernels

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds a pass_ch_oacc_kernels to the pass group pass_oacc_kernels.

The idea is that pass_parallelize_loops only deals with loops for which the 
header has been copied, so the easiest way to meet that requirement when running 
pass_parallelize_loops in group pass_oacc_kernels, is to run pass_ch as a part 
of pass_oacc_kernels.


We define a seperate pass pass_ch_oacc_kernels, to leave all loops that aren't 
part of a kernels region alone.


OK for trunk?

Thanks,
- Tom

2014-11-14  Tom de Vries  t...@codesourcery.com

	* omp-low.c (loop_in_oacc_kernels_region_p): New function.
	* omp-low.h (loop_in_oacc_kernels_region_p): Declare.
	* passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels.
	* tree-pass.h (make_pass_ch_oacc_kernels): Declare
	* tree-ssa-loop-ch.c: Include omp-low.h.
	(pass_ch_execute): Declare.
	(pass_ch::execute): Factor out ...
	(pass_ch_execute): ... this new function.  If handling oacc kernels,
	skip loops that are not in oacc kernels region.
	(pass_ch_oacc_kernels::execute):
	(pass_data_ch_oacc_kernels): New pass_data.
	(class pass_ch_oacc_kernels): New pass.
	(pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New
	function.
---
 gcc/omp-low.c  | 83 ++
 gcc/omp-low.h  |  2 ++
 gcc/passes.def |  1 +
 gcc/tree-pass.h|  1 +
 gcc/tree-ssa-loop-ch.c | 59 +--
 5 files changed, 144 insertions(+), 2 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 6caeae9..e35fa8b 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13909,4 +13909,87 @@ gimple_stmt_omp_lowering_p (gimple stmt)
   return false;
 }
 
+/* Return true if LOOP is inside a kernels region.  */
+
+bool
+loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry,
+			   basic_block *region_exit)
+{
+  bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
+  bitmap region_bitmap = BITMAP_GGC_ALLOC ();
+  bitmap_clear (region_bitmap);
+
+  if (region_entry != NULL)
+*region_entry = NULL;
+  if (region_exit != NULL)
+*region_exit = NULL;
+
+  basic_block bb;
+  gimple last;
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  if (bitmap_bit_p (region_bitmap, bb-index))
+	continue;
+
+  last = last_stmt (bb);
+  if (!last)
+	continue;
+
+  if (gimple_code (last) != GIMPLE_OACC_KERNELS)
+	continue;
+
+  bitmap_clear (excludes_bitmap);
+  bitmap_set_bit (excludes_bitmap, bb-index);
+
+  vecbasic_block dominated
+	= get_all_dominated_blocks (CDI_DOMINATORS, bb);
+
+  unsigned di;
+  basic_block dom;
+
+  basic_block end_region = NULL;
+  FOR_EACH_VEC_ELT (dominated, di, dom)
+	{
+	  if (dom == bb)
+	continue;
+
+	  last = last_stmt (dom);
+	  if (!last)
+	continue;
+
+	  if (gimple_code (last) != GIMPLE_OMP_RETURN)
+	continue;
+
+	  if (end_region == NULL
+	  || dominated_by_p (CDI_DOMINATORS, end_region, dom))
+	end_region = dom;
+	}
+
+  vecbasic_block excludes
+	= get_all_dominated_blocks (CDI_DOMINATORS, end_region);
+
+  unsigned di2;
+  basic_block exclude;
+
+  FOR_EACH_VEC_ELT (excludes, di2, exclude)
+	if (exclude != end_region)
+	  bitmap_set_bit (excludes_bitmap, exclude-index);
+
+  FOR_EACH_VEC_ELT (dominated, di, dom)
+	if (!bitmap_bit_p (excludes_bitmap, dom-index))
+	  bitmap_set_bit (region_bitmap, dom-index);
+
+  if (bitmap_bit_p (region_bitmap, loop-header-index))
+	{
+	  if (region_entry != NULL)
+	*region_entry = bb;
+	  if (region_exit != NULL)
+	*region_exit = end_region;
+	  return true;
+	}
+}
+
+  return false;
+}
+
 #include gt-omp-low.h
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index ff8a956..f1b9d77 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -29,6 +29,8 @@ extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
 extern void omp_finish_file (void);
 extern bool gimple_stmt_omp_lowering_p (gimple);
+extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block *,
+	   basic_block *);
 
 extern GTY(()) vectree, va_gc *offload_funcs;
 extern GTY(()) vectree, va_gc *offload_vars;
diff --git a/gcc/passes.def b/gcc/passes.def
index 1fdb70a..5eefe73 100644
--- a/gcc/passes.def
+++ b/gcc

[PATCH, 4/8] Add pass_tree_loop_{init,done} to pass_oacc_kernels

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds pass_tree_loop_init and pass_tree_loop_init_done to 
pass_oacc_kernels.


Pass_parallelize_loops is run between these passes in the pass group 
pass_tree_loop, since it requires loop information.  We do the same for 
pass_oacc_kernels.


OK for trunk?

Thanks,
- Tom

2014-11-14  Tom de Vries  t...@codesourcery.com

	* passes.def: Run pass_tree_loop_init and pass_tree_loop_done in pass
	group pass_oacc_kernels.
	* tree-ssa-loop.c (pass_tree_loop_init::clone)
	(pass_tree_loop_done::clone): New function.
---
 gcc/passes.def  | 2 ++
 gcc/tree-ssa-loop.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/gcc/passes.def b/gcc/passes.def
index 5eefe73..83f437b 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -77,6 +77,8 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	  NEXT_PASS (pass_ch_oacc_kernels);
+	  NEXT_PASS (pass_tree_loop_init);
+	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_fre);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index c29aa22..c78b013 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -269,6 +269,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *);
+  opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }
 
 }; // class pass_tree_loop_init
 
@@ -563,6 +564,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+  opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }
 
 }; // class pass_tree_loop_done
 
-- 
1.9.1







[PATCH, 6/8] Add pass_ccp to pass_oacc_kernels

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds pass_loop_ccp to pass group pass_oacc_kernels.

We need this pass to simplify the loop body, and allow pass_parloops to detect 
that loop iterations are independent.


OK for trunk?

Thanks,
- Tom

2014-11-14  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_ccp in pass group pass_oacc_kernels.

	* gcc.dg/pr43513.c: Update for new pass_ccp.
	* gcc.dg/tree-ssa/alias-17.c: Same.
	* gcc.dg/tree-ssa/foldconst-4.c: Same.
	* gcc.dg/tree-ssa/ssa-ccp-29.c: Same.
	* gcc.dg/tree-ssa/ssa-ccp-3.c: Same.
---
 gcc/passes.def  | 1 +
 gcc/testsuite/gcc.dg/pr43513.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/alias-17.c| 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/foldconst-4.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-29.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-3.c   | 6 +++---
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index f6c16b9..cd9443c 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_ch_oacc_kernels);
 	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_lim);
+	  NEXT_PASS (pass_ccp);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_expand_omp_ssa);
diff --git a/gcc/testsuite/gcc.dg/pr43513.c b/gcc/testsuite/gcc.dg/pr43513.c
index 78a037b..3fb0890 100644
--- a/gcc/testsuite/gcc.dg/pr43513.c
+++ b/gcc/testsuite/gcc.dg/pr43513.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-ccp2 } */
+/* { dg-options -O2 -fdump-tree-ccp3 } */
 
 void bar (int *);
 void foo (char *, int);
@@ -15,5 +15,5 @@ foo3 ()
 foo (%d , results[i]);
 }
 
-/* { dg-final { scan-tree-dump-times alloca 0 ccp2} } */
-/* { dg-final { cleanup-tree-dump ccp2 } } */
+/* { dg-final { scan-tree-dump-times alloca 0 ccp3} } */
+/* { dg-final { cleanup-tree-dump ccp3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-17.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-17.c
index 48e72ff..59862f6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/alias-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-17.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O -fno-early-inlining -fdump-tree-ccp2 } */
+/* { dg-options -O -fno-early-inlining -fdump-tree-ccp3 } */
 
 int *p;
 int inline bar(void) { return 0; }
@@ -14,5 +14,5 @@ int foo(int x)
   return *q + *p;
 }
 
-/* { dg-final { scan-tree-dump-not NOTE: no flow-sensitive alias info for ccp2 } } */
-/* { dg-final { cleanup-tree-dump ccp2 } } */
+/* { dg-final { scan-tree-dump-not NOTE: no flow-sensitive alias info for ccp3 } } */
+/* { dg-final { cleanup-tree-dump ccp3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/foldconst-4.c b/gcc/testsuite/gcc.dg/tree-ssa/foldconst-4.c
index 445d415..916a857 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/foldconst-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/foldconst-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O -fdump-tree-ccp2 } */
+/* { dg-options -O -fdump-tree-ccp3 } */
 
 struct a {int a,b;};
 const static struct a a;
@@ -10,5 +10,5 @@ test()
 {
   return a.a+b[c];
 }
-/* { dg-final { scan-tree-dump return 0; ccp2 } } */
-/* { dg-final { cleanup-tree-dump ccp2 } } */
+/* { dg-final { scan-tree-dump return 0; ccp3 } } */
+/* { dg-final { cleanup-tree-dump ccp3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-29.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-29.c
index 44d2945..1e3f41b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-29.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-29.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O -fdump-tree-ccp2 } */
+/* { dg-options -O -fdump-tree-ccp3 } */
 
 static double num;
 int foo (void)
@@ -7,5 +7,5 @@ int foo (void)
   return *(unsigned *)num;
 }
 
-/* { dg-final { scan-tree-dump return 0; ccp2 } } */
-/* { dg-final { cleanup-tree-dump ccp2 } } */
+/* { dg-final { scan-tree-dump return 0; ccp3 } } */
+/* { dg-final { cleanup-tree-dump ccp3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-3.c
index 86a706b..03717e1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-3.c
@@ -1,5 +1,5 @@
 /* { dg-do

[PATCH, 5/8] Add pass_loop_im to pass_oacc_kernels

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds pass_loop_im to pass group pass_oacc_kernels.

We need this pass to simplify the loop body, and allow pass_parloops to detect 
that loop iterations are independent.


OK for trunk?

Thanks,
- Tom


2014-11-14  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_lim in pass group pass_ch_oacc_kernels.

	* c-c++-common/restrict-2.c: Update for new pass_lim.
	* c-c++-common/restrict-4.c: Same.
	* g++.dg/tree-ssa/pr33615.C:  Same.
	* g++.dg/tree-ssa/restrict1.C: Same.
	* gcc.dg/tm/pub-safety-1.c:  Same.
	* gcc.dg/tm/reg-promotion.c:  Same.
	* gcc.dg/tree-ssa/20050314-1.c:  Same.
	* gcc.dg/tree-ssa/loop-32.c: Same.
	* gcc.dg/tree-ssa/loop-33.c: Same.
	* gcc.dg/tree-ssa/loop-34.c: Same.
	* gcc.dg/tree-ssa/loop-35.c: Same.
	* gcc.dg/tree-ssa/loop-7.c: Same.
	* gcc.dg/tree-ssa/pr23109.c: Same.
	* gcc.dg/tree-ssa/restrict-3.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-1.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-10.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-11.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-12.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-2.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-3.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-6.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-7.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-8.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-9.c: Same.
	* gcc.dg/tree-ssa/structopt-1.c: Same.
	* gfortran.dg/pr32921.f: Same.
---
 gcc/passes.def  | 1 +
 gcc/testsuite/c-c++-common/restrict-2.c | 6 +++---
 gcc/testsuite/c-c++-common/restrict-4.c | 6 +++---
 gcc/testsuite/g++.dg/tree-ssa/pr33615.C | 6 +++---
 gcc/testsuite/g++.dg/tree-ssa/restrict1.C   | 6 +++---
 gcc/testsuite/gcc.dg/tm/pub-safety-1.c  | 6 +++---
 gcc/testsuite/gcc.dg/tm/reg-promotion.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-32.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-33.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-34.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-35.c | 8 
 gcc/testsuite/gcc.dg/tree-ssa/loop-7.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/pr23109.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-3.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-1.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-10.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-11.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-12.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-2.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-3.c   | 8 
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-6.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-7.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-8.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-9.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/structopt-1.c | 6 +++---
 gcc/testsuite/gfortran.dg/pr32921.f | 6 +++---
 27 files changed, 81 insertions(+), 80 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 83f437b..f6c16b9 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -78,6 +78,7 @@ along with GCC; see the file COPYING3.  If not see
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	  NEXT_PASS (pass_ch_oacc_kernels);
 	  NEXT_PASS (pass_tree_loop_init);
+	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_expand_omp_ssa);
diff --git a/gcc/testsuite/c-c++-common/restrict-2.c b/gcc/testsuite/c-c++-common/restrict-2.c
index 3f71b77..f0b0e15a 100644
--- a/gcc/testsuite/c-c++-common/restrict-2.c
+++ b/gcc/testsuite/c-c++-common/restrict-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O -fno-strict-aliasing -fdump-tree-lim1-details } */
+/* { dg-options -O -fno-strict-aliasing -fdump-tree-lim2-details } */
 
 void foo (float * __restrict__ a, float * __restrict__ b, int n, int j)
 {
@@ -10,5 +10,5 @@ void foo (float * __restrict__ a, float * __restrict__ b, int n, int j)
 
 /* We should move the RHS of the store out of the loop.  */
 
-/* { dg-final { scan-tree-dump-times Moving statement 11 lim1 } } */
-/* { dg-final { cleanup-tree-dump lim1 } } */
+/* { dg-final { scan-tree-dump-times Moving statement 11 lim2 } } */
+/* { dg-final { cleanup-tree-dump lim2 } } */
diff --git a/gcc/testsuite/c-c++-common/restrict-4.c b/gcc/testsuite/c-c++-common/restrict-4.c

[PATCH, 7/8] Add pass_parloops_oacc_kernels to pass_oacc_kernels

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds:
- a specialized version of pass_parallelize_loops called
pass_parloops_oacc_kernels to pass group pass_oacc_kernels, and
- relevant test-cases.

The pass only handles loops that are in a kernels region, and skips over bits of 
pass_parallelize_loops that are already done for oacc kernels.


The pass reintroduces the use of omp_expand_local, I haven't managed to make it 
work yet using the external pass pass_expand_omp_ssa.


An obvious limitation of the patch is the fact that we copy over the clauses 
from the kernels directive to the generated parallel directive. We'll need to do 
something more intelligent here, f.i. setting vector_length based on the 
parallelization factor.


Another limitation is that the pass still needs -ftree-parallelize-loops to 
trigger.

OK for trunk?

Thanks,
- Tom

2014-11-14  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_parallelize_loops_oacc_kernels in pass group
	pass_oacc_kernels.  Move pass_expand_omp_ssa into pass group
	pass_oacc_kernels.
	* tree-parloops.c (create_parallel_loop): Add function parameters
	region_entry and bool oacc_kernels_p.  Handle oacc_kernels_p.
	(gen_parallel_loop): Same.  Use omp_expand_local if oacc_kernels_p.
	Call create_parallel_loop with additional args.
	(parallelize_loops): Add function parameter oacc_kernels_p.  Calculate
	dominance info.  Skip loops that are not in a kernels region. Call
	gen_parallel_loop with additional args.
	(pass_parallelize_loops::execute): Call parallelize_loops with false
	argument.
	(pass_data_parallelize_loops_oacc_kernels): New pass_data.
	(class pass_parallelize_loops_oacc_kernels): New pass.
	(pass_parallelize_loops_oacc_kernels::execute)
	(make_pass_parallelize_loops_oacc_kernels): New function.
	* tree-pass.h (make_pass_parallelize_loops_oacc_kernels): Declare.

	* testsuite/libgomp.oacc-c/oacc-kernels-2-run.c: New test.
	* testsuite/libgomp.oacc-c/oacc-kernels-run.c: New test.

	* gcc.dg/oacc-kernels-2.c: New test.
	* gcc.dg/oacc-kernels.c: New test.
---
 gcc/passes.def |   3 +-
 gcc/testsuite/gcc.dg/oacc-kernels-2.c  |  79 +++
 gcc/testsuite/gcc.dg/oacc-kernels.c|  71 ++
 gcc/tree-parloops.c| 242 -
 gcc/tree-pass.h|   2 +
 .../testsuite/libgomp.oacc-c/oacc-kernels-2-run.c  |  65 ++
 .../testsuite/libgomp.oacc-c/oacc-kernels-run.c|  59 +
 7 files changed, 465 insertions(+), 56 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/oacc-kernels-2.c
 create mode 100644 gcc/testsuite/gcc.dg/oacc-kernels.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/oacc-kernels-2-run.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/oacc-kernels-run.c

diff --git a/gcc/passes.def b/gcc/passes.def
index cd9443c..cc09ba9 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -80,9 +80,10 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_ccp);
+  	  NEXT_PASS (pass_parallelize_loops_oacc_kernels);
+	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
-	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_fre);
 	  NEXT_PASS (pass_merge_phi);
 	  NEXT_PASS (pass_cd_dce);
diff --git a/gcc/testsuite/gcc.dg/oacc-kernels-2.c b/gcc/testsuite/gcc.dg/oacc-kernels-2.c
new file mode 100644
index 000..1ff4bad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/oacc-kernels-2.c
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target fopenacc } */
+/* { dg-options -fopenacc -ftree-parallelize-loops=32 -O2 -std=c99 -fdump-tree-parloops_oacc_kernels-all -fdump-tree-copyrename } */
+
+#include stdlib.h
+#include stdio.h
+
+#define N (1024 * 512)
+#define N_REF 4293394432
+
+#if 1
+#define COUNTERTYPE unsigned int
+#else
+#define COUNTERTYPE int
+#endif
+
+int
+main (void)
+{
+  unsigned int i;
+
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = malloc (N * sizeof (unsigned int));
+  b = malloc (N * sizeof (unsigned int));
+  c = malloc (N * sizeof (unsigned int));
+
+
+#pragma acc kernels copyout (a[0:N

[PATCH, 8/8] Do simple omp lowering for no address taken var

2014-11-15 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels 
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch lowers integer variables that do not have their address taken as 
local variable.  We use a copy at region entry and exit to copy the value in and 
out.


In the context of reduction handling in a kernels region, this allows the 
parloops reduction analysis to recognize the reduction, even after oacc lowering 
has been done in pass_lower_omp.


In more detail, without this patch, the omp_data_i load and stores are generated 
in place (in this case, in the loop):

...
{
  .omp_data_iD.2201 = .omp_data_arr.15D.2220;
  {
unsigned intD.9 iD.2146;

iD.2146 = 0;
goto D.2207;
D.2208:
D.2216 = .omp_data_iD.2201-cD.2203;
c.9D.2176 = *D.2216;
D.2177 = (long unsigned intD.10) iD.2146;
D.2178 = D.2177 * 4;
D.2179 = c.9D.2176 + D.2178;
D.2180 = *D.2179;
D.2217 = .omp_data_iD.2201-sumD.2205;
D.2218 = *D.2217;
D.2217 = .omp_data_iD.2201-sumD.2205;
D.2219 = D.2180 + D.2218;
*D.2217 = D.2219;
iD.2146 = iD.2146 + 1;
D.2207:
if (iD.2146 = 524287) goto D.2208; else goto D.2209;
D.2209:
  }
...

With this patch, the omp_data_i load and stores for sum are generated at entry 
and exit:

...
{
  .omp_data_iD.2201 = .omp_data_arr.15D.2218;
  D.2216 = .omp_data_iD.2201-sumD.2205;
  sumD.2206 = *D.2216;
  {
unsigned intD.9 iD.2146;

iD.2146 = 0;
goto D.2207;
D.2208:
D.2217 = .omp_data_iD.2201-cD.2203;
c.9D.2176 = *D.2217;
D.2177 = (long unsigned intD.10) iD.2146;
D.2178 = D.2177 * 4;
D.2179 = c.9D.2176 + D.2178;
D.2180 = *D.2179;
sumD.2206 = D.2180 + sumD.2206;
iD.2146 = iD.2146 + 1;
D.2207:
if (iD.2146 = 524287) goto D.2208; else goto D.2209;
D.2209:
  }
  *D.2216 = sumD.2206;
  #pragma omp return
}
...


So, without the patch the reduction operation looks like this:
...
*(.omp_data_iD.2201-sumD.2205) = *(.omp_data_iD.2201-sumD.2205) + x
...

And with this patch the reduction operation is simply:
...
sumD.2206 = sumD.2206 + x:
...

OK for trunk?

Thanks,
- Tom

2014-11-03  Tom de Vries  t...@codesourcery.com

	* gimple.c (gimple_seq_ior_addresses_taken_op)
	(gimple_seq_ior_addresses_taken): New function.
	* gimple.h (gimple_seq_ior_addresses_taken): Declare.
	* omp-low.c (addresses_taken): Declare local variable.
	(lower_oacc_offload): Lower variables that do not have their address
	taken as local variable.  Use a copy at region entry and exit to copy
	the value in and out.
	(execute_lower_omp): Calculate addresses_taken.
---
 gcc/gimple.c  | 35 +++
 gcc/gimple.h  |  1 +
 gcc/omp-low.c | 25 ++---
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/gcc/gimple.c b/gcc/gimple.c
index a9174e6..107eb26 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2428,6 +2428,41 @@ gimple_ior_addresses_taken (bitmap addresses_taken, gimple stmt)
 	gimple_ior_addresses_taken_1);
 }
 
+/* Helper function for gimple_seq_ior_addresses_taken.  */
+
+static tree
+gimple_seq_ior_addresses_taken_op (tree *tp,
+   int *walk_subtrees ATTRIBUTE_UNUSED,
+   void *data)
+{
+  struct walk_stmt_info *wi = (struct walk_stmt_info *)data;
+  bitmap addresses_taken = (bitmap)wi-info;
+
+  tree t = *tp;
+  if (TREE_CODE (t) != ADDR_EXPR)
+return NULL_TREE;
+
+  tree var = TREE_OPERAND (t, 0);
+  if (!DECL_P (var))
+return NULL_TREE;
+
+  bitmap_set_bit (addresses_taken, DECL_UID (var));
+
+  return NULL_TREE;
+}
+
+/* Find the decls in SEQ that have their address taken, and set the
+   corresponding decl_uid

[PATCH, committed] Add -ftree-tail-merge to tail-merge testcases

2014-11-17 Thread Tom de Vries

Hi,

this patch adds -ftree-tail-merge to tail-merge testcases.

There is no separate dump for tail-merge, instead the test-cases use 
-fdump-tree-pre, so it's not easy to spot that they're tail-merge test-cases. 
This patch fixes that by adding an explicit -ftree-tail-merge.


Committed as obvious.

Thanks,
- Tom
From 70b69e3572414bd486cd9c25ed77216975136e21 Mon Sep 17 00:00:00 2001
From: Tom de Vries t...@codesourcery.com
Date: Mon, 17 Nov 2014 19:00:12 +0100
Subject: [PATCH 1/5] Add -ftree-tail-merge to tail-merge testcases

2014-11-17  Tom de Vries  t...@codesourcery.com

	* gcc.dg/pr43864-2.c: Add -ftree-tail-merge to dg-options.
	* gcc.dg/pr43864-3.c: Same.
	* gcc.dg/pr43864-4.c: Same.
	* gcc.dg/pr43864.c: Same.
	* gcc.dg/pr50763.c: Same.
	* gcc.dg/pr51879-12.c: Same.
	* gcc.dg/pr51879-16.c: Same.
	* gcc.dg/pr51879-17.c: Same.
	* gcc.dg/pr51879-18.c: Same.
	* gcc.dg/pr51879-2.c: Same.
	* gcc.dg/pr51879-3.c: Same.
	* gcc.dg/pr51879-4.c: Same.
	* gcc.dg/pr51879-6.c: Same.
	* gcc.dg/pr51879-7.c: Same.
	* gcc.dg/pr51879.c: Same.
---
 gcc/testsuite/gcc.dg/pr43864-2.c  | 2 +-
 gcc/testsuite/gcc.dg/pr43864-3.c  | 2 +-
 gcc/testsuite/gcc.dg/pr43864-4.c  | 2 +-
 gcc/testsuite/gcc.dg/pr43864.c| 2 +-
 gcc/testsuite/gcc.dg/pr50763.c| 2 +-
 gcc/testsuite/gcc.dg/pr51879-12.c | 2 +-
 gcc/testsuite/gcc.dg/pr51879-16.c | 2 +-
 gcc/testsuite/gcc.dg/pr51879-17.c | 2 +-
 gcc/testsuite/gcc.dg/pr51879-18.c | 2 +-
 gcc/testsuite/gcc.dg/pr51879-2.c  | 2 +-
 gcc/testsuite/gcc.dg/pr51879-3.c  | 2 +-
 gcc/testsuite/gcc.dg/pr51879-4.c  | 2 +-
 gcc/testsuite/gcc.dg/pr51879-6.c  | 2 +-
 gcc/testsuite/gcc.dg/pr51879-7.c  | 2 +-
 gcc/testsuite/gcc.dg/pr51879.c| 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr43864-2.c b/gcc/testsuite/gcc.dg/pr43864-2.c
index f00fff9..8ab1e1a 100644
--- a/gcc/testsuite/gcc.dg/pr43864-2.c
+++ b/gcc/testsuite/gcc.dg/pr43864-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 int
 f (int c, int b, int d)
diff --git a/gcc/testsuite/gcc.dg/pr43864-3.c b/gcc/testsuite/gcc.dg/pr43864-3.c
index c4954e1..8b72ecf 100644
--- a/gcc/testsuite/gcc.dg/pr43864-3.c
+++ b/gcc/testsuite/gcc.dg/pr43864-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 /* Commutative case.  */
 
diff --git a/gcc/testsuite/gcc.dg/pr43864-4.c b/gcc/testsuite/gcc.dg/pr43864-4.c
index 42adfee..4dbc953 100644
--- a/gcc/testsuite/gcc.dg/pr43864-4.c
+++ b/gcc/testsuite/gcc.dg/pr43864-4.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 /* Different stmt order.  */
 
diff --git a/gcc/testsuite/gcc.dg/pr43864.c b/gcc/testsuite/gcc.dg/pr43864.c
index 8d1e989..a644e21 100644
--- a/gcc/testsuite/gcc.dg/pr43864.c
+++ b/gcc/testsuite/gcc.dg/pr43864.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 extern void foo (char*, int);
 extern void mysprintf (char *, char *);
diff --git a/gcc/testsuite/gcc.dg/pr50763.c b/gcc/testsuite/gcc.dg/pr50763.c
index 695b61c..8201fd3 100644
--- a/gcc/testsuite/gcc.dg/pr50763.c
+++ b/gcc/testsuite/gcc.dg/pr50763.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fno-tree-dominator-opts -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fno-tree-dominator-opts -fdump-tree-pre } */
 
 int bar (int i);
 
diff --git a/gcc/testsuite/gcc.dg/pr51879-12.c b/gcc/testsuite/gcc.dg/pr51879-12.c
index 1b25e29..8126505 100644
--- a/gcc/testsuite/gcc.dg/pr51879-12.c
+++ b/gcc/testsuite/gcc.dg/pr51879-12.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 __attribute__((pure)) int bar (int);
 __attribute__((pure)) int bar2 (int);
diff --git a/gcc/testsuite/gcc.dg/pr51879-16.c b/gcc/testsuite/gcc.dg/pr51879-16.c
index 3a84e97..7897094 100644
--- a/gcc/testsuite/gcc.dg/pr51879-16.c
+++ b/gcc/testsuite/gcc.dg/pr51879-16.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 struct S {
   int i;
diff --git a/gcc/testsuite/gcc.dg/pr51879-17.c b/gcc/testsuite/gcc.dg/pr51879-17.c
index 806fe7b..8a2fe5e 100644
--- a/gcc/testsuite/gcc.dg/pr51879-17.c
+++ b/gcc/testsuite/gcc.dg/pr51879-17.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-pre } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
 
 struct S {
   int i;
diff --git a/gcc/testsuite/gcc.dg/pr51879-18.c b/gcc/testsuite/gcc.dg/pr51879-18.c
index 95629f1..8de3557 100644
--- a/gcc/testsuite/gcc.dg/pr51879-18.c
+++ b/gcc/testsuite/gcc.dg/pr51879-18.c
@@ -1,5 +1,5 @@
 /* { dg-do compile

[PATCH, committed] Fix scan patterns for pr43864-{2,3,4].c

2014-11-17 Thread Tom de Vries

Hi,

this patch fixes the scan patterns for test-cases pr43864-{2,3,4].c.

The patterns matched over several lines, this is fixed in the patch by using 
(?n).

Committed as obvious.

Thanks,
- Tom
2014-11-17  Tom de Vries  t...@codesourcery.com

	* gcc.dg/pr43864-2.c: Fix scan-tree-dump-times scan pattern.
	* gcc.dg/pr43864-3.c: Same.
	* gcc.dg/pr43864-4.c: Same.
---
 gcc/testsuite/gcc.dg/pr43864-2.c | 2 +-
 gcc/testsuite/gcc.dg/pr43864-3.c | 2 +-
 gcc/testsuite/gcc.dg/pr43864-4.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr43864-2.c b/gcc/testsuite/gcc.dg/pr43864-2.c
index 8ab1e1a..c576dbd 100644
--- a/gcc/testsuite/gcc.dg/pr43864-2.c
+++ b/gcc/testsuite/gcc.dg/pr43864-2.c
@@ -18,6 +18,6 @@ f (int c, int b, int d)
 }
 
 /* { dg-final { scan-tree-dump-times if  0 pre} } */
-/* { dg-final { scan-tree-dump-times _.*\\\+.*_ 1 pre} } */
+/* { dg-final { scan-tree-dump-times (?n)_.*\\+.*_ 1 pre} } */
 /* { dg-final { scan-tree-dump-not Invalid sum pre} } */
 /* { dg-final { cleanup-tree-dump pre } } */
diff --git a/gcc/testsuite/gcc.dg/pr43864-3.c b/gcc/testsuite/gcc.dg/pr43864-3.c
index 8b72ecf..7956450 100644
--- a/gcc/testsuite/gcc.dg/pr43864-3.c
+++ b/gcc/testsuite/gcc.dg/pr43864-3.c
@@ -19,6 +19,6 @@ int f(int c, int b, int d)
 }
 
 /* { dg-final { scan-tree-dump-times if  0 pre} } */
-/* { dg-final { scan-tree-dump-times _.*\\\+.*_ 1 pre} } */
+/* { dg-final { scan-tree-dump-times (?n)_.*\\+.*_ 1 pre} } */
 /* { dg-final { scan-tree-dump-not Invalid sum pre} } */
 /* { dg-final { cleanup-tree-dump pre } } */
diff --git a/gcc/testsuite/gcc.dg/pr43864-4.c b/gcc/testsuite/gcc.dg/pr43864-4.c
index 4dbc953..7353b3d 100644
--- a/gcc/testsuite/gcc.dg/pr43864-4.c
+++ b/gcc/testsuite/gcc.dg/pr43864-4.c
@@ -23,7 +23,7 @@ int f(int c, int b, int d)
 }
 
 /* { dg-final { scan-tree-dump-times if  0 pre} } */
-/* { dg-final { scan-tree-dump-times _.*\\\+.*_ 1 pre} } */
-/* { dg-final { scan-tree-dump-times  -  2 pre} } */
+/* { dg-final { scan-tree-dump-times (?n)_.*\\+.*_ 1 pre} } */
+/* { dg-final { scan-tree-dump-times (?n)_.*-.*_ 2 pre} } */
 /* { dg-final { scan-tree-dump-not Invalid sum pre} } */
 /* { dg-final { cleanup-tree-dump pre } } */
-- 
1.9.1



[PATCH, PR62167] Fix tail-merge pass for dead type-unsafe code

2014-11-18 Thread Tom de Vries

Richard,

this (trunk) patch fixes PR62167.

The patch fixes a problem that triggers with the test-case on the 4.8 branch, 
when tail-merge makes a dead type-unsafe load alive.


I'm not able to reproduce this bug on 4.9 and trunk with the same test-case. On 
those branches, the tail-merge already does not happen.


The reason for the difference is as follows: With 4.8 the two phi arguments of 
the phi in the tail block are value-numbered identically:

...
SCC consists of: p_14
Value numbering p_14 stmt = p_14 = MEM[(struct head *)_13].first;
Setting value number of p_14 to p_14 (changed)

SCC consists of: p_15
Value numbering p_15 stmt = p_15 = _13-next;
Setting value number of p_15 to p_14 (changed)
...

With 4.9 (and trunk), that's not the case:
...
SCC consists of: p_14
Value numbering p_14 stmt = p_14 = MEM[(struct head *)heads][k.1_9].first;
Setting value number of p_14 to p_14 (changed)

SCC consists of: p_15
Value numbering p_15 stmt = p_15 = _13-next;
Setting value number of p_15 to p_15 (changed)
...

I'm not sure the bug triggers on trunk and 4.9, but I see no reason why it could 
not trigger, so I'd prefer to apply the patch to 4.9 and trunk as well.


The patch introduces an xfail for pr51879-12.c. I can follow up with a patch to 
improve upon that, but I think that's better limited to trunk only.


Bootstrapped and reg-tested on x86_64/trunk.

OK for trunk/stage3, 4.8, 4.9?

Thanks,
- Tom
2014-11-17  Tom de Vries  t...@codesourcery.com

	PR tree-optimization/62167
	* tree-ssa-tail-merge.c (stmt_local_def): Handle statements with vuse
	conservatively.
	(gimple_equal_p): Don't use vn_valueize to compare for lhs equality of
	assigns.

	* gcc.dg/pr51879-12.c: Add xfails.
	* gcc.dg/pr62167-run.c: New test.
	* gcc.dg/pr62167.c: New test.
---
 gcc/testsuite/gcc.dg/pr51879-12.c  |  4 +--
 gcc/testsuite/gcc.dg/pr62167-run.c | 47 +++
 gcc/testsuite/gcc.dg/pr62167.c | 50 ++
 gcc/tree-ssa-tail-merge.c  |  6 +++--
 4 files changed, 103 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr62167-run.c
 create mode 100644 gcc/testsuite/gcc.dg/pr62167.c

diff --git a/gcc/testsuite/gcc.dg/pr51879-12.c b/gcc/testsuite/gcc.dg/pr51879-12.c
index 8126505..85e2687 100644
--- a/gcc/testsuite/gcc.dg/pr51879-12.c
+++ b/gcc/testsuite/gcc.dg/pr51879-12.c
@@ -24,6 +24,6 @@ foo (int y)
   baz (a);
 }
 
-/* { dg-final { scan-tree-dump-times bar \\( 1 pre} } */
-/* { dg-final { scan-tree-dump-times bar2 \\( 1 pre} } */
+/* { dg-final { scan-tree-dump-times bar \\( 1 pre { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times bar2 \\( 1 pre { xfail *-*-* } } } */
 /* { dg-final { cleanup-tree-dump pre } } */
diff --git a/gcc/testsuite/gcc.dg/pr62167-run.c b/gcc/testsuite/gcc.dg/pr62167-run.c
new file mode 100644
index 000..37214a3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr62167-run.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options -O2 -ftree-tail-merge } */
+
+struct node
+{
+  struct node *next;
+  struct node *prev;
+};
+
+struct node node;
+
+struct head
+{
+  struct node *first;
+};
+
+struct head heads[5];
+
+int k = 2;
+
+struct head *head = heads[2];
+
+int
+main ()
+{
+  struct node *p;
+
+  node.next = (void*)0;
+
+  node.prev = (void *)head;
+
+  head-first = node;
+
+  struct node *n = head-first;
+
+  struct head *h = heads[k];
+
+  heads[2].first = n-next;
+
+  if ((void*)n-prev == (void *)h)
+p = h-first;
+  else
+/* Dead tbaa-unsafe load from ((struct node *)heads[2])-next.  */
+p = n-prev-next;
+
+  return !(p == (void*)0);
+}
diff --git a/gcc/testsuite/gcc.dg/pr62167.c b/gcc/testsuite/gcc.dg/pr62167.c
new file mode 100644
index 000..f8c31a0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr62167.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -ftree-tail-merge -fdump-tree-pre } */
+
+struct node
+{
+  struct node *next;
+  struct node *prev;
+};
+
+struct node node;
+
+struct head
+{
+  struct node *first;
+};
+
+struct head heads[5];
+
+int k = 2;
+
+struct head *head = heads[2];
+
+int
+main ()
+{
+  struct node *p;
+
+  node.next = (void*)0;
+
+  node.prev = (void *)head;
+
+  head-first = node;
+
+  struct node *n = head-first;
+
+  struct head *h = heads[k];
+
+  heads[2].first = n-next;
+
+  if ((void*)n-prev == (void *)h)
+p = h-first;
+  else
+/* Dead tbaa-unsafe load from ((struct node *)heads[2])-next.  */
+p = n-prev-next;
+
+  return !(p == (void*)0);
+}
+
+/* { dg-final { scan-tree-dump-not Removing basic block pre} } */
+/* { dg-final { cleanup-tree-dump pre } } */
diff --git a/gcc/tree-ssa-tail-merge.c b/gcc/tree-ssa-tail-merge.c
index 303bd5e..1651985 100644
--- a/gcc/tree-ssa-tail-merge.c
+++ b/gcc/tree-ssa-tail-merge.c
@@ -326,7 +326,8 @@ stmt_local_def (gimple stmt)
 
   if (gimple_vdef (stmt) != NULL_TREE
   || gimple_has_side_effects (stmt)
-  || gimple_could_trap_p_1 (stmt, false, false))
+  || gimple_could_trap_p_1

Re: openacc kernels directive -- initial support

2014-11-19 Thread Tom de Vries

On 15-11-14 13:14, Tom de Vries wrote:

  Don't allow flto-partition=balance for fopenacc
Unsubmitted. This works around a compilation problem for
libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c that I ran into on
our internal dev branch.  I'll investigate whether I can reproduce with
gomp-4_0-branch asap.


I managed to reproduce this problem with the gomp-4_0-branch. Filed as: 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63979 .


Thanks,
- Tom


[PATCH, ARM] Fix PR63718, Thumb1 bootstrap -- disable fuse-caller-save for Thumb1

2014-11-20 Thread Tom de Vries

Richard,

This patch fixes PR63718, which currently breaks Thumb1 bootstrap.

The problem is that in Thumb1 mode, we emit the epilogue in RTL, but the last 
insn - epilogue_insns - does not accurately model the corresponding insns

emitted in the asm file. F.i., the asm file may contain an insn:
...
  pop {r0}

while the corresponding RTL pattern looks like this:
...
(jump_insn (unspec_volatile [
(return)
 ] VUNSPEC_EPILOGUE))
...

As a consequence, the epilogue may clobber registers without fuse-caller-save 
being able to analyze that.


Adding the missing clobbers to epilogue_insns is not trivial, and probably not a 
good idea for stage3. The patch works around the problem by disabling 
fuse-caller-save in Thumb1 mode.


Build and reg-tested on arm-none-eabi.

OK for stage3?

Thanks,
- Tom
2014-11-20  Tom de Vries  t...@codesourcery.com

	PR rtl-optimization/63718
	* config/arm/arm.c (arm_option_override): Disable fuse-caller-save for
	Thumb1.

Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c (revision 217730)
+++ gcc/config/arm/arm.c (working copy)
@@ -3105,6 +3105,18 @@ arm_option_override (void)
(!arm_arch7 || !current_tune-prefer_ldrd_strd))
 flag_schedule_fusion = 0;
 
+  /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
+ - epilogue_insns - does not accurately model the corresponding insns
+ emitted in the asm file.  In particular, see the comment in thumb_exit
+ 'Find out how many of the (return) argument registers we can corrupt'.
+ As a consequence, the epilogue may clobber registers without
+ fuse-caller-save finding out about it.  Therefore, disable fuse-caller-save
+ in Thumb1 mode.
+ TODO: Accurately model clobbers for epilogue_insns and reenable
+ fuse-caller-save.  */
+  if (TARGET_THUMB1)
+flag_use_caller_save = 0;
+
   /* Register global variables with the garbage collector.  */
   arm_add_gc_roots ();
 }


Re: [PATCH 1/2] teach mklog to get name / email from git config when available

2014-11-20 Thread Tom de Vries

On 09-05-14 16:47, Diego Novillo wrote:

I would probably use git config directly here. It would work with both
git and svn checkouts (if you have a global .git configuration). But
testing for .git is fine with me as well.

I like Peter's idea of having a ~/.mklog file to override. This would
work for both svn and git checkouts.



Diego,

this patch implements both:
- it uses the ~/.mklog file proposed by Peter
- in absence of a ~/.mklog file, it uses git config, also when not in a git
  repository

OK?

Thanks,
- Tom
2014-11-20  Tom de Vries  t...@codesourcery.com
	Peter Bergner  berg...@vnet.ibm.com

	* mklog: Handle .mklog.  Use git setting independent of presence .git
	directory.
---
 contrib/mklog | 56 +++-
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/contrib/mklog b/contrib/mklog
index 840f6f8..abbf0af 100755
--- a/contrib/mklog
+++ b/contrib/mklog
@@ -29,32 +29,46 @@
 use File::Temp;
 use File::Copy qw(cp mv);
 
-# Change these settings to reflect your profile.
-$username = $ENV{'USER'};
-$name = `finger $username | grep -o 'Name: .*'`;
-@n = split(/: /, $name);
-$name = $n[1]; chop($name);
-$addr = $username . \@my.domain.org;
 $date = `date +%Y-%m-%d`; chop ($date);
 
+$dot_mklog_format_msg =
+The .mklog format is:\n
+. NAME = ...\n
+. EMAIL = ...\n;
+
+# Create a .mklog to reflect your profile, if necessary.
+my $conf = $ENV{HOME}/.mklog;
+if (-f $conf) {
+open (CONF, $conf)
+	or die Could not open file '$conf' for reading: $!\n;
+while (CONF) {
+	if (m/^\s*NAME\s*=\s*(.*)\s*$/)	{
+	$name = $1;
+	} elsif (m/^\s*EMAIL\s*=\s*(.*)\s*$/) {
+	$addr = $1;
+	}
+}
+if (!($name  $addr)) {
+	die Could not read .mklog settings.\n
+	. $dot_mklog_format_msg;
+}
+} else {
+$name = `git config user.name`;
+chomp($name);
+$addr = `git config user.email`;
+chomp($addr);
+
+if (!($name  $addr)) {
+	die Could not read git user.name and user.email settings.\n
+	. Please add missing git settings, or create a .mklog file in
+	.  $ENV{HOME}.\n
+	. $dot_mklog_format_msg;
+}
+}
+
 $gcc_root = $0;
 $gcc_root =~ s/[^\\\/]+$/../;
 
-# if this is a git tree then take name and email from the git configuration
-if (-d $gcc_root/.git) {
-  $gitname = `git config user.name`;
-  chomp($gitname);
-  if ($gitname) {
-	  $name = $gitname;
-  }
-
-  $gitaddr = `git config user.email`;
-  chomp($gitaddr);
-  if ($gitaddr) {
-	  $addr = $gitaddr;
-  }
-}
-
 #-
 # Program starts here. You should not need to edit anything below this
 # line.
-- 
1.9.1



Re: [PATCH 1/2] teach mklog to get name / email from git config when available

2014-11-20 Thread Tom de Vries

On 20-11-14 17:43, Segher Boessenkool wrote:

On Thu, Nov 20, 2014 at 05:22:20PM +0100, Tom de Vries wrote:

+my $conf = $ENV{HOME}/.mklog;
+if (-f $conf) {
+open (CONF, $conf)
+   or die Could not open file '$conf' for reading: $!\n;
+while (CONF) {
+   if (m/^\s*NAME\s*=\s*(.*)\s*$/) {


The final \s* never matches anything since the .* gobbles up everything.
Use .*? if you really want to get rid of the trailing whitespace.



Thanks for spotting that, patch updated.

OK for trunk?

Thanks,
- Tom



Segher



2014-11-20  Tom de Vries  t...@codesourcery.com
	Peter Bergner  berg...@vnet.ibm.com

	* mklog: Handle .mklog.  Use git setting independent of presence .git
	directory.
---
 contrib/mklog | 56 +++-
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/contrib/mklog b/contrib/mklog
index 840f6f8..f7974a7 100755
--- a/contrib/mklog
+++ b/contrib/mklog
@@ -29,32 +29,46 @@
 use File::Temp;
 use File::Copy qw(cp mv);
 
-# Change these settings to reflect your profile.
-$username = $ENV{'USER'};
-$name = `finger $username | grep -o 'Name: .*'`;
-@n = split(/: /, $name);
-$name = $n[1]; chop($name);
-$addr = $username . \@my.domain.org;
 $date = `date +%Y-%m-%d`; chop ($date);
 
+$dot_mklog_format_msg =
+The .mklog format is:\n
+. NAME = ...\n
+. EMAIL = ...\n;
+
+# Create a .mklog to reflect your profile, if necessary.
+my $conf = $ENV{HOME}/.mklog;
+if (-f $conf) {
+open (CONF, $conf)
+	or die Could not open file '$conf' for reading: $!\n;
+while (CONF) {
+	if (m/^\s*NAME\s*=\s*(.*?)\s*$/) {
+	$name = $1;
+	} elsif (m/^\s*EMAIL\s*=\s*(.*?)\s*$/) {
+	$addr = $1;
+	}
+}
+if (!($name  $addr)) {
+	die Could not read .mklog settings.\n
+	. $dot_mklog_format_msg;
+}
+} else {
+$name = `git config user.name`;
+chomp($name);
+$addr = `git config user.email`;
+chomp($addr);
+
+if (!($name  $addr)) {
+	die Could not read git user.name and user.email settings.\n
+	. Please add missing git settings, or create a .mklog file in
+	.  $ENV{HOME}.\n
+	. $dot_mklog_format_msg;
+}
+}
+
 $gcc_root = $0;
 $gcc_root =~ s/[^\\\/]+$/../;
 
-# if this is a git tree then take name and email from the git configuration
-if (-d $gcc_root/.git) {
-  $gitname = `git config user.name`;
-  chomp($gitname);
-  if ($gitname) {
-	  $name = $gitname;
-  }
-
-  $gitaddr = `git config user.email`;
-  chomp($gitaddr);
-  if ($gitaddr) {
-	  $addr = $gitaddr;
-  }
-}
-
 #-
 # Program starts here. You should not need to edit anything below this
 # line.
-- 
1.9.1



[PATCH, committed] Add fgcse-sm test with scan-rtl-dump

2014-11-21 Thread Tom de Vries

Hi,

this patch adds a fgcse-sm test with a scan-rtl-dump directive.

The other fgcse-sm tests:
...
./gcc/testsuite/gcc.dg/pr45352-3.c
./gcc/testsuite/gcc.dg/torture/pr24257.c
./gcc/testsuite/gcc.target/i386/movsi-sm-1.c
./gcc/testsuite/g++.dg/opt/pr36185.C
...
do not check whether fgcse-sm actually does something.

Committed as trivial.

Thanks,
- Tom
2014-11-21  Tom de Vries  t...@codesourcery.com

	* gcc.dg/store-motion-fgcse-sm.c: New test.
---
 gcc/testsuite/gcc.dg/store-motion-fgcse-sm.c | 32 
 1 file changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/store-motion-fgcse-sm.c

diff --git a/gcc/testsuite/gcc.dg/store-motion-fgcse-sm.c b/gcc/testsuite/gcc.dg/store-motion-fgcse-sm.c
new file mode 100644
index 000..b331a24
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/store-motion-fgcse-sm.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options -O2 -ftree-pre -fno-tree-loop-im -fgcse-sm -fdump-rtl-store_motion } */
+
+/* tree-pre moves the *sum load out of the loop.  ftree-loop-im moves the *sum
+   store out of the loop, so we disable it, to allow fgcse-sm to do it
+   instead.  */
+
+#include stdlib.h
+
+void __attribute__((noinline))
+f (unsigned int *__restrict__ a, unsigned int *__restrict__ sum, unsigned int n)
+{
+  unsigned int i;
+  for (i = 0; i  n; ++i)
+*sum += a[i];
+}
+
+int
+main ()
+{
+  unsigned int a[] = { 1, 10, 100 };
+  unsigned sum = 1000;
+
+  f (a, sum, 3);
+  if (sum != )
+abort ();
+
+  return 0;
+}
+
+/* Check that -fgcse-sm did something for f.  */
+/* { dg-final { scan-rtl-dump STORE_MOTION of f, .* basic blocks, 1 insns deleted, 1 insns created store_motion } } */
-- 
1.9.1



Re: [PATCH, 1/8] Expand oacc kernels after pass_build_ealias

2014-11-24 Thread Tom de Vries

On 15-11-14 18:19, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch moves omp expansion of the oacc kernels directive to after
pass_build_ealias.

The rationale is that in order to use pass_parallelize_loops for analysis and
transformation of an oacc kernels region, we postpone omp expansion of that
region until the earliest point in the pass list where enough information is
availabe to run pass_parallelize_loops, in other words, after pass_build_ealias.

The patch postpones expansion in expand_omp, and ensures expansion by adding
pass_expand_omp_ssa:
- after pass_build_ealias, and
- after pass_all_early_optimizations for the case we're not optimizing.

In order to make sure the oacc kernels region arrives at pass_expand_omp_ssa,
the way it left expand_omp, the patch makes pass_ccp and pass_forwprop aware of
lowered omp code, to handle it conservatively.

The patch contains changes in expand_omp_target to deal with ssa-code, similar
to what is already present in expand_omp_taskreg.

Furthermore, the patch forces the .omp_data_sizes and .omp_data_kinds to not be
static for oacc kernels. It does this to get some references to .omp_data_sizes
and .omp_data_kinds in the ssa code.  Without these references, the definitions
will be removed. The reference of the variables in GIMPLE_OACC_KERNELS is not
enough to have them not removed. [ In vries/oacc-kernels, I used a BUILT_IN_USE
kludge for this purpose ].

Finally, at the end of pass_expand_omp_ssa we're left with SSA_NAMEs in the
original function of which the definition has been removed (as in moved to the
split off function). TODO_remove_unused_locals takes care of some of them, but
not the anonymous ones. So the patch iterates over all SSA_NAMEs to find these
dangling SSA_NAMEs and releases them.



Reposting with small update: I've replaced the use of the rather generic 
gimple_stmt_omp_lowering_p with the more specific gimple_stmt_omp_data_i_init_p.


Bootstrapped and reg-tested in the same way as before.


OK for trunk?

Thanks,
- Tom



2014-11-14  Tom de Vries  t...@codesourcery.com

	* function.h (struct function): Add contains_oacc_kernels field.
	* gimplify.c (gimplify_omp_workshare): Set contains_oacc_kernels.
	* omp-low.c: Include gimple-pretty-print.h.
	(release_first_vuse_in_edge_dest): New function.
	(expand_omp_target): Handle ssa-code.
	(expand_omp): Don't expand GIMPLE_OACC_KERNELS when not in ssa.
	(pass_data_expand_omp): Don't set PROP_gimple_eomp unconditionally in
	properties_provided field.
	(pass_expand_omp::execute): Set PROP_gimple_eomp in
	cfun-curr_properties only if cfun does not contain oacc kernels.
	(pass_data_expand_omp_ssa): Add TODO_remove_unused_locals to
	todo_flags_finish field.
	(pass_expand_omp_ssa::execute): Release dandging SSA_NAMEs after calling
	execute_expand_omp.
	(lower_omp_target): Add static_arrays variable, init to 1.  Don't use
	static arrays for kernels directive.  Use static_arrays variable.
	Handle case that .omp_data_kinds is not static.
	(gimple_stmt_ssa_operand_references_var_p)
	(gimple_stmt_omp_data_i_init_p): New function.
	* omp-low.h (gimple_stmt_omp_data_i_init_p): Declare.
	* passes.def: Add pass_expand_omp_ssa after pass_build_ealias.
	* tree-ssa-ccp.c: Include omp-low.h.
	(surely_varying_stmt_p, ccp_visit_stmt): Handle omp lowering code
	conservatively.
	* tree-ssa-forwprop.c: Include omp-low.h.
	(pass_forwprop::execute): Handle omp lowering code conservatively.
---
 gcc/function.h  |   3 +
 gcc/gimplify.c  |   1 +
 gcc/omp-low.c   | 196 +---
 gcc/omp-low.h   |   1 +
 gcc/passes.def  |   2 +
 gcc/tree-ssa-ccp.c  |   6 ++
 gcc/tree-ssa-forwprop.c |   4 +-
 7 files changed, 200 insertions(+), 13 deletions(-)

diff --git a/gcc/function.h b/gcc/function.h
index 3a6305c..bb48775 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -667,6 +667,9 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when the function contains oacc kernels directives.  */
+  unsigned int contains_oacc_kernels : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index ad48d51..c40f20f 100644
--- a/gcc/gimplify.c
+++ b/gcc

Re: [PATCH, 8/8] Do simple omp lowering for no address taken var

2014-11-24 Thread Tom de Vries

On 17-11-14 11:13, Richard Biener wrote:

On Sat, 15 Nov 2014, Tom de Vries wrote:


On 15-11-14 13:14, Tom de Vries wrote:

 Hi,
 
 I'm submitting a patch series with initial support for the oacc kernels
 directive.
 
 The patch series uses pass_parallelize_loops to implement parallelization of
 loops in the oacc kernels region.
 
 The patch series consists of these 8 patches:
 ...
   1  Expand oacc kernels after pass_build_ealias
   2  Add pass_oacc_kernels
   3  Add pass_ch_oacc_kernels to pass_oacc_kernels
   4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
   5  Add pass_loop_im to pass_oacc_kernels
   6  Add pass_ccp to pass_oacc_kernels
   7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
   8  Do simple omp lowering for no address taken var
 ...


This patch lowers integer variables that do not have their address taken as
local variable.  We use a copy at region entry and exit to copy the value in
and out.

In the context of reduction handling in a kernels region, this allows the
parloops reduction analysis to recognize the reduction, even after oacc
lowering has been done in pass_lower_omp.

In more detail, without this patch, the omp_data_i load and stores are
generated in place (in this case, in the loop):
...
 {
   .omp_data_iD.2201 = .omp_data_arr.15D.2220;
   {
 unsigned intD.9 iD.2146;

 iD.2146 = 0;
 goto D.2207;
 D.2208:
 D.2216 = .omp_data_iD.2201-cD.2203;
 c.9D.2176 = *D.2216;
 D.2177 = (long unsigned intD.10) iD.2146;
 D.2178 = D.2177 * 4;
 D.2179 = c.9D.2176 + D.2178;
 D.2180 = *D.2179;
 D.2217 = .omp_data_iD.2201-sumD.2205;
 D.2218 = *D.2217;
 D.2217 = .omp_data_iD.2201-sumD.2205;
 D.2219 = D.2180 + D.2218;
 *D.2217 = D.2219;
 iD.2146 = iD.2146 + 1;
 D.2207:
 if (iD.2146 = 524287) goto D.2208; else goto D.2209;
 D.2209:
   }
...

With this patch, the omp_data_i load and stores for sum are generated at entry
and exit:
...
 {
   .omp_data_iD.2201 = .omp_data_arr.15D.2218;
   D.2216 = .omp_data_iD.2201-sumD.2205;
   sumD.2206 = *D.2216;
   {
 unsigned intD.9 iD.2146;

 iD.2146 = 0;
 goto D.2207;
 D.2208:
 D.2217 = .omp_data_iD.2201-cD.2203;
 c.9D.2176 = *D.2217;
 D.2177 = (long unsigned intD.10) iD.2146;
 D.2178 = D.2177 * 4;
 D.2179 = c.9D.2176 + D.2178;
 D.2180 = *D.2179;
 sumD.2206 = D.2180 + sumD.2206;
 iD.2146 = iD.2146 + 1;
 D.2207:
 if (iD.2146 = 524287) goto D.2208; else goto D.2209;
 D.2209:
   }
   *D.2216 = sumD.2206;
   #pragma omp return
 }
...


So, without the patch the reduction operation looks like this:
...
 *(.omp_data_iD.2201-sumD.2205) = *(.omp_data_iD.2201-sumD.2205) + x
...

And with this patch the reduction operation is simply:
...
 sumD.2206 = sumD.2206 + x:
...

OK for trunk?

I presume the reason you are trying to do that here is that otherwise
it happens too late?  What you do is what loop store motion would
do.


Richard,

Thanks for the hint. I've built a reduction example:
...
void __attribute__((noinline))
f (unsigned int *__restrict__ a, unsigned int *__restrict__ sum, unsigned int n)
{
  unsigned int i;
  for (i = 0; i  n; ++i)
*sum += a[i];
}...
and observed that store motion of the *sum store is done by pass_loop_im, 
provided the *sum load is taken out of the the loop by pass_pre first.


So alternatively, we could use pass_pre and pass_loop_im to achieve the same 
effect.

When trying out adding pass_pre as a part of the pass group pass_oacc_kernels, I 
found that also pass_copyprop was required to get parloops to recognize the 
reduction.


Attached patch adds the pre pass to pass group pass_oacc_kernels.

Bootstrapped and reg-tested in the same way as before.

OK for trunk?
2014-11-23  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_split_crit_edges and pass_pre to pass group
	pass_oacc_kernels.
	* tree-ssa-pre.c (pass_pre::clone): New function.
	* tree-ssa-sccvn.c (visit_use):  Handle .omp_data_i init conservatively.
	* tree-ssa-tail-merge.c (tail_merge_optimize): Don't run if omp not
	expanded yet.

	* g++.dg/init/new19.C: Replace pre with pre2.
	* g++.dg/tree-ssa/pr33615-2.C: Same.
	* gcc.dg/pr31847.c

Re: [PATCH, 8/8] Do simple omp lowering for no address taken var

2014-11-24 Thread Tom de Vries

On 24-11-14 12:28, Tom de Vries wrote:

On 17-11-14 11:13, Richard Biener wrote:

On Sat, 15 Nov 2014, Tom de Vries wrote:


On 15-11-14 13:14, Tom de Vries wrote:

 Hi,
 
 I'm submitting a patch series with initial support for the oacc kernels
 directive.
 
 The patch series uses pass_parallelize_loops to implement parallelization of
 loops in the oacc kernels region.
 
 The patch series consists of these 8 patches:
 ...
   1  Expand oacc kernels after pass_build_ealias
   2  Add pass_oacc_kernels
   3  Add pass_ch_oacc_kernels to pass_oacc_kernels
   4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
   5  Add pass_loop_im to pass_oacc_kernels
   6  Add pass_ccp to pass_oacc_kernels
   7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
   8  Do simple omp lowering for no address taken var
 ...


This patch lowers integer variables that do not have their address taken as
local variable.  We use a copy at region entry and exit to copy the value in
and out.

In the context of reduction handling in a kernels region, this allows the
parloops reduction analysis to recognize the reduction, even after oacc
lowering has been done in pass_lower_omp.

In more detail, without this patch, the omp_data_i load and stores are
generated in place (in this case, in the loop):
...
 {
   .omp_data_iD.2201 = .omp_data_arr.15D.2220;
   {
 unsigned intD.9 iD.2146;

 iD.2146 = 0;
 goto D.2207;
 D.2208:
 D.2216 = .omp_data_iD.2201-cD.2203;
 c.9D.2176 = *D.2216;
 D.2177 = (long unsigned intD.10) iD.2146;
 D.2178 = D.2177 * 4;
 D.2179 = c.9D.2176 + D.2178;
 D.2180 = *D.2179;
 D.2217 = .omp_data_iD.2201-sumD.2205;
 D.2218 = *D.2217;
 D.2217 = .omp_data_iD.2201-sumD.2205;
 D.2219 = D.2180 + D.2218;
 *D.2217 = D.2219;
 iD.2146 = iD.2146 + 1;
 D.2207:
 if (iD.2146 = 524287) goto D.2208; else goto D.2209;
 D.2209:
   }
...

With this patch, the omp_data_i load and stores for sum are generated at entry
and exit:
...
 {
   .omp_data_iD.2201 = .omp_data_arr.15D.2218;
   D.2216 = .omp_data_iD.2201-sumD.2205;
   sumD.2206 = *D.2216;
   {
 unsigned intD.9 iD.2146;

 iD.2146 = 0;
 goto D.2207;
 D.2208:
 D.2217 = .omp_data_iD.2201-cD.2203;
 c.9D.2176 = *D.2217;
 D.2177 = (long unsigned intD.10) iD.2146;
 D.2178 = D.2177 * 4;
 D.2179 = c.9D.2176 + D.2178;
 D.2180 = *D.2179;
 sumD.2206 = D.2180 + sumD.2206;
 iD.2146 = iD.2146 + 1;
 D.2207:
 if (iD.2146 = 524287) goto D.2208; else goto D.2209;
 D.2209:
   }
   *D.2216 = sumD.2206;
   #pragma omp return
 }
...


So, without the patch the reduction operation looks like this:
...
 *(.omp_data_iD.2201-sumD.2205) = *(.omp_data_iD.2201-sumD.2205) + x
...

And with this patch the reduction operation is simply:
...
 sumD.2206 = sumD.2206 + x:
...

OK for trunk?

I presume the reason you are trying to do that here is that otherwise
it happens too late?  What you do is what loop store motion would
do.


Richard,

Thanks for the hint. I've built a reduction example:
...
void __attribute__((noinline))
f (unsigned int *__restrict__ a, unsigned int *__restrict__ sum, unsigned int n)
{
   unsigned int i;
   for (i = 0; i  n; ++i)
 *sum += a[i];
}...
and observed that store motion of the *sum store is done by pass_loop_im,
provided the *sum load is taken out of the the loop by pass_pre first.

So alternatively, we could use pass_pre and pass_loop_im to achieve the same
effect.

When trying out adding pass_pre as a part of the pass group pass_oacc_kernels, I
found that also pass_copyprop was required to get parloops to recognize the
reduction.



Attached patch adds pass_copyprop to pass group pass_oacc_kernels.

Bootstrapped and reg-tested in the same way as before.

OK for trunk?

Thanks,
- Tom
2014-11-23  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_copy_prop to pass group pass_oacc_kernels.
	* tree-ssa-copy.c (stmt_may_generate_copy): Handle .omp_data_i init
	conservatively.
---
 gcc/passes.def  | 1 +
 gcc/tree-ssa-copy.c | 4 
 2 files changed, 5 insertions(+)

diff --git a/gcc/passes.def b/gcc/passes.def
index 3a7b096..8c663b0 100644
--- a/gcc/passes.def
+++ b

Re: [patch] Define new std::ios_base::failure with abi_tag(cxx11)

2014-11-24 Thread Tom de Vries

On 14-11-14 13:18, Jonathan Wakely wrote:

This adds system_error support to iostreams, including the required
base class changes to std::ios_base::failure. The abi_tag is used to
make it a distinct type.  This changes the type of I/O exceptions
thrown by the library but exceptions are very rarely used with
iostreams.

Tested powerpc64-linux and x86_64-linux



I'm running into this failure with a non-bootstrap build from trunk. Could this 
be related?

...
2 incompatible symbols
0
_ZNSt8ios_base7failureB5cxx11C1ERKSsRKSt10error_code
std::ios_base::failure[abi:cxx11]::cxx11(std::string const, std::error_code 
const)
version status: incompatible
GLIBCXX_3.4
type: function
status: added


1
_ZNSt8ios_base7failureB5cxx11C2ERKSsRKSt10error_code
std::ios_base::failure[abi:cxx11]::cxx11(std::string const, std::error_code 
const)
version status: incompatible
GLIBCXX_3.4
type: function
status: added



 libstdc++-v3 check-abi Summary 

# of added symbols:  143
# of missing symbols:0
# of undesignated symbols:   2
# of incompatible symbols:   2

using: baseline_symbols.txt
FAIL: libstdc++-abi/abi_check
...

Thanks,
- Tom


patch.txt


commit 8f8279579e72423450eb3ff744d9102f7b891d8d
Author: Jonathan Wakelyjwak...@redhat.com
Date:   Thu Nov 13 19:30:15 2014 +

 Define C++11 version of std::ios_base::failure.

* config/abi/pre/gnu.ver: Add new exports.
* include/bits/ios_base.h (ios_base::failure): New definition using
abi_tag.
(io_errc, make_error_code, make_error_category, iostream_category):
Define.
* include/std/system_error (system_error): Add char* constructors.
* src/c++11/Makefile.am: Add new file.
* src/c++11/Makefile.in: Regenerate.
* src/c++11/cxx11-ios_failure.cc: New file.
* src/c++98/ios_failure.cc: Compile old definition without abi_tag.
* testsuite/27_io/ios_base/failure/cxx11.cc: New.
* testsuite/27_io/ios_base/failure/what-1.cc: Allow string returned by
ios_base::failure::what() to contain additional data.
* testsuite/27_io/ios_base/failure/what-2.cc: Likewise..
* testsuite/27_io/ios_base/failure/what-3.cc: Likewise..
* testsuite/27_io/ios_base/failure/what-big.cc: Likewise..

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index bd44bcc..78f3e77 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -1473,6 +1473,18 @@ GLIBCXX_3.4.21 {
  # std::basic_ios::operator bool() const
  _ZNKSt9basic_iosI[cw]St11char_traitsI[cw]EEcvbEv;

+# C++11 version of std::ios_base::failure
+_ZNKSt8ios_base7failureB5cxx114whatEv;
+_ZNSt8ios_base7failureB5cxx11C[12]ERKSs;
+_ZNSt8ios_base7failureB5cxx11C[12]EPKcRKSt10error_code;
+_ZNSt8ios_base7failureB5cxx11C[12]ERKSsB5cxx11;
+_ZNSt8ios_base7failureB5cxx11C[12]ERKSsB5cxx11RKSt10error_code;
+_ZNSt8ios_base7failureB5cxx11D[012]Ev;
+_ZTINSt8ios_base7failureB5cxx11E;
+_ZTSNSt8ios_base7failureB5cxx11E;
+_ZTVNSt8ios_base7failureB5cxx11E;
+_ZSt17iostream_categoryv;
+
  # std::ctype_base::blank
  _ZNSt10ctype_base5blankE;

diff --git a/libstdc++-v3/include/bits/ios_base.h 
b/libstdc++-v3/include/bits/ios_base.h
index 5e33b81..8e60059 100644
--- a/libstdc++-v3/include/bits/ios_base.h
+++ b/libstdc++-v3/include/bits/ios_base.h
@@ -40,6 +40,12 @@
  #include bits/localefwd.h
  #include bits/locale_classes.h

+#if __cplusplus  201103L
+# include stdexcept
+#else
+# include system_error
+#endif
+
  namespace std _GLIBCXX_VISIBILITY(default)
  {
  _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -186,6 +192,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_ios_seekdir_end = 1L  16
  };

+#if __cplusplus = 201103L
+  /// I/O error code
+  enum class io_errc { stream = 1 };
+
+  template  struct is_error_code_enumio_errc : public true_type { };
+
+  const error_category iostream_category() noexcept;
+
+  inline error_code
+  make_error_code(io_errc e) noexcept
+  { return error_code(static_castint(e), iostream_category()); }
+
+  inline error_condition
+  make_error_condition(io_errc e) noexcept
+  { return error_condition(static_castint(e), iostream_category()); }
+#endif
+
// 27.4.2  Class ios_base
/**
 *  @brief  The base of the I/O class hierarchy.
@@ -198,6 +221,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
class ios_base
{
+#if _GLIBCXX_USE_CXX11_ABI
+#if __cplusplus  201103L
+// Type that is layout-compatible with std::system_error
+struct system_error : std::runtime_error
+{
+  // Type that is layout-compatible with std::error_code
+  struct error_code
+  {
+   error_code() { }
+  private:
+   int _M_value;
+   const void* _M_cat;
+  } _M_code;
+};
+#endif
+#endif
public:

  /**
@@ -206,6 +245,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   *
   *  27.4.2.1.1  Class 

Re: [patch] Define new std::ios_base::failure with abi_tag(cxx11)

2014-11-24 Thread Tom de Vries

On 24-11-14 18:12, Jonathan Wakely wrote:

On 24/11/14 17:48 +0100, Tom de Vries wrote:

On 14-11-14 13:18, Jonathan Wakely wrote:

This adds system_error support to iostreams, including the required
base class changes to std::ios_base::failure. The abi_tag is used to
make it a distinct type.  This changes the type of I/O exceptions
thrown by the library but exceptions are very rarely used with
iostreams.

Tested powerpc64-linux and x86_64-linux



I'm running into this failure with a non-bootstrap build from trunk.


Is this on i686?



No, x86_64-unknown-linux-gnu.


Could this be related?


Yes, definitely.

I was aware of the failures but hadn't been able to reproduce them.
With the info below I should be able to fix it, thanks.



Great :)

Thanks,
- Tom




2 incompatible symbols
0
_ZNSt8ios_base7failureB5cxx11C1ERKSsRKSt10error_code
std::ios_base::failure[abi:cxx11]::cxx11(std::string const, std::error_code
const)
version status: incompatible
GLIBCXX_3.4
type: function
status: added


1
_ZNSt8ios_base7failureB5cxx11C2ERKSsRKSt10error_code
std::ios_base::failure[abi:cxx11]::cxx11(std::string const, std::error_code
const)
version status: incompatible
GLIBCXX_3.4
type: function
status: added




Re: [PATCH, 8/8] Do simple omp lowering for no address taken var

2014-11-24 Thread Tom de Vries

On 24-11-14 13:12, Richard Biener wrote:

On Mon, 24 Nov 2014, Tom de Vries wrote:


On 24-11-14 12:28, Tom de Vries wrote:

On 17-11-14 11:13, Richard Biener wrote:

On Sat, 15 Nov 2014, Tom de Vries wrote:


On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc

kernels

directive.

The patch series uses pass_parallelize_loops to implement

parallelization of

loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
  1  Expand oacc kernels after pass_build_ealias
  2  Add pass_oacc_kernels
  3  Add pass_ch_oacc_kernels to pass_oacc_kernels
  4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
  5  Add pass_loop_im to pass_oacc_kernels
  6  Add pass_ccp to pass_oacc_kernels
  7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
  8  Do simple omp lowering for no address taken var
...


This patch lowers integer variables that do not have their address

taken as

local variable.  We use a copy at region entry and exit to copy the

value in

and out.

In the context of reduction handling in a kernels region, this allows

the

parloops reduction analysis to recognize the reduction, even after oacc
lowering has been done in pass_lower_omp.

In more detail, without this patch, the omp_data_i load and stores are
generated in place (in this case, in the loop):
...
 {
   .omp_data_iD.2201 = .omp_data_arr.15D.2220;
   {
 unsigned intD.9 iD.2146;

 iD.2146 = 0;
 goto D.2207;
 D.2208:
 D.2216 = .omp_data_iD.2201-cD.2203;
 c.9D.2176 = *D.2216;
 D.2177 = (long unsigned intD.10) iD.2146;
 D.2178 = D.2177 * 4;
 D.2179 = c.9D.2176 + D.2178;
 D.2180 = *D.2179;
 D.2217 = .omp_data_iD.2201-sumD.2205;
 D.2218 = *D.2217;
 D.2217 = .omp_data_iD.2201-sumD.2205;
 D.2219 = D.2180 + D.2218;
 *D.2217 = D.2219;
 iD.2146 = iD.2146 + 1;
 D.2207:
 if (iD.2146 = 524287) goto D.2208; else goto

D.2209;

 D.2209:
   }
...

With this patch, the omp_data_i load and stores for sum are generated

at entry

and exit:
...
 {
   .omp_data_iD.2201 = .omp_data_arr.15D.2218;
   D.2216 = .omp_data_iD.2201-sumD.2205;
   sumD.2206 = *D.2216;
   {
 unsigned intD.9 iD.2146;

 iD.2146 = 0;
 goto D.2207;
 D.2208:
 D.2217 = .omp_data_iD.2201-cD.2203;
 c.9D.2176 = *D.2217;
 D.2177 = (long unsigned intD.10) iD.2146;
 D.2178 = D.2177 * 4;
 D.2179 = c.9D.2176 + D.2178;
 D.2180 = *D.2179;
 sumD.2206 = D.2180 + sumD.2206;
 iD.2146 = iD.2146 + 1;
 D.2207:
 if (iD.2146 = 524287) goto D.2208; else goto

D.2209;

 D.2209:
   }
   *D.2216 = sumD.2206;
   #pragma omp return
 }
...


So, without the patch the reduction operation looks like this:
...
 *(.omp_data_iD.2201-sumD.2205) = *(.omp_data_iD.2201-sumD.2205)

+ x

...

And with this patch the reduction operation is simply:
...
 sumD.2206 = sumD.2206 + x:
...

OK for trunk?

I presume the reason you are trying to do that here is that otherwise
it happens too late?  What you do is what loop store motion would
do.


Richard,

Thanks for the hint. I've built a reduction example:
...
void __attribute__((noinline))
f (unsigned int *__restrict__ a, unsigned int *__restrict__ sum, unsigned
int n)
{
unsigned int i;
for (i = 0; i  n; ++i)
  *sum += a[i];
}...
and observed that store motion of the *sum store is done by pass_loop_im,
provided the *sum load is taken out of the the loop by pass_pre first.

So alternatively, we could use pass_pre and pass_loop_im to achieve the same
effect.

When trying out adding pass_pre as a part of the pass group
pass_oacc_kernels, I
found that also pass_copyprop was required to get parloops to recognize the
reduction.



Attached patch adds pass_copyprop to pass group pass_oacc_kernels.


Hum, you are gobbling up very many passes here.  In this case copyprop
will also perform trivial constant propagation so maybe it's enough
to replace ccp by copyprop.  Or go the full way and add a FRE pass.



Yep, replacing ccp by copyprop seems to work well enough.

I'll repost once bootstrap and reg-test are done.

Thanks,
- Tom



[PATCH] Add verify_sese

2014-11-24 Thread Tom de Vries

Richard,

I ran into a problem with my oacc kernels directive patch series where 
tail-merge added another entry into a region that was previously 
single-entry-single-exit.


That resulted in hitting this assert in calc_dfs_tree:
...
  /* This aborts e.g. when there is _no_ path from ENTRY to EXIT at all.  */
  gcc_assert (di-nodes == (unsigned int) n_basic_blocks_for_fn (cfun) - 1);
...
during a call to move_sese_region_to_fn.

This patch makes sure that we abort earlier, with a clearer message of what is 
actually wrong.


Bootstrapped and reg-tested on x86_64.

OK for trunk/stage3?

Thanks,
- Tom
2014-11-23  Tom de Vries  t...@codesourcery.com

	* tree-cfg.c (verify_sese): New function.
	(move_sese_region_to_fn): Call verify_sese.
	* tree-cfg.h (verify_sese): Declare.
---
 gcc/tree-cfg.c | 55 +++
 gcc/tree-cfg.h |  1 +
 2 files changed, 56 insertions(+)

diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index e78554f..db9f6c2 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -6870,6 +6870,58 @@ fixup_loop_arrays_after_move (struct function *fn1, struct function *fn2,
 fixup_loop_arrays_after_move (fn1, fn2, loop);
 }
 
+DEBUG_FUNCTION void
+verify_sese (basic_block entry, basic_block exit, vecbasic_block *bbs_p)
+{
+  basic_block bb;
+  edge_iterator ei;
+  edge e;
+  bitmap bbs = BITMAP_ALLOC (NULL);
+  int i;
+
+  gcc_assert (entry != NULL);
+  gcc_assert (entry != exit);
+  gcc_assert (bbs_p != NULL);
+
+  gcc_assert (bbs_p-length ()  0);
+
+  FOR_EACH_VEC_ELT (*bbs_p, i, bb)
+bitmap_set_bit (bbs, bb-index);
+
+  gcc_assert (bitmap_bit_p (bbs, entry-index));
+  gcc_assert (exit == NULL || bitmap_bit_p (bbs, exit-index));
+
+  FOR_EACH_VEC_ELT (*bbs_p, i, bb)
+{
+  if (bb == entry)
+	{
+	  gcc_assert (single_pred_p (entry));
+	  gcc_assert (!bitmap_bit_p (bbs, single_pred (entry)-index));
+	}
+  else
+	for (ei = ei_start (bb-preds); !ei_end_p (ei); ei_next (ei))
+	  {
+	e = ei_edge (ei);
+	gcc_assert (bitmap_bit_p (bbs, e-src-index));
+	  }
+
+  if (bb == exit)
+	{
+	  gcc_assert (single_succ_p (exit));
+	  gcc_assert (!bitmap_bit_p (bbs, single_succ (exit)-index));
+	}
+  else
+	for (ei = ei_start (bb-succs); !ei_end_p (ei); ei_next (ei))
+	  {
+	e = ei_edge (ei);
+	gcc_assert (bitmap_bit_p (bbs, e-dest-index));
+	  }
+}
+
+  BITMAP_FREE (bbs);
+}
+
+
 /* Move a single-entry, single-exit region delimited by ENTRY_BB and
EXIT_BB to function DEST_CFUN.  The whole region is replaced by a
single basic block in the original CFG and the new basic block is
@@ -6918,6 +6970,9 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
   bbs.create (0);
   bbs.safe_push (entry_bb);
   gather_blocks_in_sese_region (entry_bb, exit_bb, bbs);
+#ifdef ENABLE_CHECKING
+  verify_sese (entry_bb, exit_bb, bbs);
+#endif
 
   /* The blocks that used to be dominated by something in BBS will now be
  dominated by the new block.  */
diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
index 626e973..d35e5ba 100644
--- a/gcc/tree-cfg.h
+++ b/gcc/tree-cfg.h
@@ -73,6 +73,7 @@ extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
   basic_block *);
 extern void gather_blocks_in_sese_region (basic_block entry, basic_block exit,
 	  vecbasic_block *bbs_p);
+extern void verify_sese (basic_block, basic_block, vecbasic_block *);
 extern basic_block move_sese_region_to_fn (struct function *, basic_block,
    basic_block, tree);
 extern void dump_function_to_file (tree, FILE *, int);
-- 
1.9.1



Re: [PATCH, 1/8] Expand oacc kernels after pass_build_ealias

2014-11-25 Thread Tom de Vries

On 24-11-14 11:56, Tom de Vries wrote:

On 15-11-14 18:19, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch moves omp expansion of the oacc kernels directive to after
pass_build_ealias.

The rationale is that in order to use pass_parallelize_loops for analysis and
transformation of an oacc kernels region, we postpone omp expansion of that
region until the earliest point in the pass list where enough information is
availabe to run pass_parallelize_loops, in other words, after pass_build_ealias.

The patch postpones expansion in expand_omp, and ensures expansion by adding
pass_expand_omp_ssa:
- after pass_build_ealias, and
- after pass_all_early_optimizations for the case we're not optimizing.

In order to make sure the oacc kernels region arrives at pass_expand_omp_ssa,
the way it left expand_omp, the patch makes pass_ccp and pass_forwprop aware of
lowered omp code, to handle it conservatively.

The patch contains changes in expand_omp_target to deal with ssa-code, similar
to what is already present in expand_omp_taskreg.

Furthermore, the patch forces the .omp_data_sizes and .omp_data_kinds to not be
static for oacc kernels. It does this to get some references to .omp_data_sizes
and .omp_data_kinds in the ssa code.  Without these references, the definitions
will be removed. The reference of the variables in GIMPLE_OACC_KERNELS is not
enough to have them not removed. [ In vries/oacc-kernels, I used a BUILT_IN_USE
kludge for this purpose ].

Finally, at the end of pass_expand_omp_ssa we're left with SSA_NAMEs in the
original function of which the definition has been removed (as in moved to the
split off function). TODO_remove_unused_locals takes care of some of them, but
not the anonymous ones. So the patch iterates over all SSA_NAMEs to find these
dangling SSA_NAMEs and releases them.



Reposting with small update: I've replaced the use of the rather generic
gimple_stmt_omp_lowering_p with the more specific gimple_stmt_omp_data_i_init_p.

Bootstrapped and reg-tested in the same way as before.



I've moved pass_expand_omp_ssa one down in the pass list, past pass_fre.

This allows fre to unify references to the same omp variable before entering 
pass_oacc_kernels, which helps pass_lim in pass_oacc_kernels.


F.i. this reduction fragment:
...
  # VUSE .MEM_8
  # PT = { D.2282 }
  _67 = .omp_data_i_59-sumD.2270;
  # VUSE .MEM_8
  _68 = *_67;

  _70 = _66 + _68;

  # VUSE .MEM_8
  # PT = { D.2282 }
  _69 = .omp_data_i_59-sumD.2270;
  # .MEM_71 = VDEF .MEM_8
  *_69 = _70;
...

is transformed by fre into:
...
  # VUSE .MEM_8
  # PT = { D.2282 }
  _67 = .omp_data_i_59-sumD.2270;
  # VUSE .MEM_8
  _68 = *_67;

  _70 = _66 + _68;

  # .MEM_71 = VDEF .MEM_8
  *_67 = _70;
...

In order for pass_fre to respect the kernels region boundaries, I've added a 
change in tree-ssa-sccvn.c:visit_use to handle the .omp_data_i init conservatively.


Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
- Tom

[PATCH 1/7] Expand oacc kernels after pass_fre

2014-11-25  Tom de Vries  t...@codesourcery.com

	* function.h (struct function): Add contains_oacc_kernels field.
	* gimplify.c (gimplify_omp_workshare): Set contains_oacc_kernels.
	* omp-low.c: Include gimple-pretty-print.h.
	(release_first_vuse_in_edge_dest): New function.
	(expand_omp_target): Handle ssa-code.
	(expand_omp): Don't expand GIMPLE_OACC_KERNELS when not in ssa.
	(pass_data_expand_omp): Don't set PROP_gimple_eomp unconditionally in
	properties_provided field.
	(pass_expand_omp::execute): Set PROP_gimple_eomp in
	cfun-curr_properties only if cfun does not contain oacc kernels.
	(pass_data_expand_omp_ssa): Add TODO_remove_unused_locals to
	todo_flags_finish field.
	(pass_expand_omp_ssa::execute): Release dangling SSA_NAMEs after calling
	execute_expand_omp.
	(lower_omp_target): Add static_arrays variable, init to 1.  Don't use
	static arrays for kernels directive.  Use static_arrays variable.
	Handle case that .omp_data_kinds is not static.
	(gimple_stmt_ssa_operand_references_var_p)
	(gimple_stmt_omp_data_i_init_p): New function.
	* omp-low.h (gimple_stmt_omp_data_i_init_p): Declare.
	* passes.def: Add pass_expand_omp_ssa after pass_fre.  Add
	pass_expand_omp_ssa after pass_all_early_optimizations.
	* tree-ssa-ccp.c: Include omp-low.h.
	(surely_varying_stmt_p, ccp_visit_stmt): Handle

Re: [PATCH, 2/8] Add pass_oacc_kernels

2014-11-25 Thread Tom de Vries

On 15-11-14 18:20, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds a pass group pass_oacc_kernels.

The rationale is that we want a pass group to run oacc kernels region related
(optimization) passes in.



Updated for moving pass_oacc_kernels down past pass_fre in the pass list.

Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
  - Tom

[PATCH 2/7] Add pass_oacc_kernels

2014-11-25  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass group pass_oacc_kernels.
	* tree-pass.h (make_pass_oacc_kernels): Declare.
	* tree-ssa-loop.c (gate_oacc_kernels): New static function.
	(pass_data_oacc_kernels): New pass_data.
	(class pass_oacc_kernels): New pass.
	(make_pass_oacc_kernels): New function.
---
 gcc/passes.def  |  7 ++-
 gcc/tree-pass.h |  1 +
 gcc/tree-ssa-loop.c | 48 
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index bf1cd34..efb3d8c 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -86,7 +86,12 @@ along with GCC; see the file COPYING3.  If not see
 	 execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
 	  NEXT_PASS (pass_fre);
-	  NEXT_PASS (pass_expand_omp_ssa);
+	  /* Pass group that runs when there are oacc kernels in the
+	 function.  */
+	  NEXT_PASS (pass_oacc_kernels);
+	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+	  NEXT_PASS (pass_expand_omp_ssa);
+	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_merge_phi);
 	  NEXT_PASS (pass_cd_dce);
 	  NEXT_PASS (pass_early_ipa_sra);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 75f8aa5..d63ab2b 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -449,6 +449,7 @@ extern gimple_opt_pass *make_pass_strength_reduction (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_vtable_verify (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_ubsan (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_sanopt (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_oacc_kernels (gcc::context *ctxt);
 
 /* IPA Passes */
 extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 758b5fc..c29aa22 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -157,6 +157,54 @@ make_pass_tree_loop (gcc::context *ctxt)
   return new pass_tree_loop (ctxt);
 }
 
+/* Gate for oacc kernels pass group.  */
+
+static bool
+gate_oacc_kernels (function *fn)
+{
+  if (!flag_openacc)
+return false;
+
+  return fn-contains_oacc_kernels;
+}
+
+/* The oacc kernels superpass.  */
+
+namespace {
+
+const pass_data pass_data_oacc_kernels =
+{
+  GIMPLE_PASS, /* type */
+  oacc_kernels, /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_TREE_LOOP, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_oacc_kernels : public gimple_opt_pass
+{
+public:
+  pass_oacc_kernels (gcc::context *ctxt)
+: gimple_opt_pass (pass_data_oacc_kernels, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *fn) { return gate_oacc_kernels (fn); }
+
+}; // class pass_oacc_kernels
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_kernels (gcc::context *ctxt)
+{
+  return new pass_oacc_kernels (ctxt);
+}
+
 /* The no-loop superpass.  */
 
 namespace {
-- 
1.9.1



Re: [PATCH, 3/8] Add pass_ch_oacc_kernels to pass_oacc_kernels

2014-11-25 Thread Tom de Vries

On 15-11-14 18:21, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds a pass_ch_oacc_kernels to the pass group pass_oacc_kernels.

The idea is that pass_parallelize_loops only deals with loops for which the
header has been copied, so the easiest way to meet that requirement when running
pass_parallelize_loops in group pass_oacc_kernels, is to run pass_ch as a part
of pass_oacc_kernels.

We define a seperate pass pass_ch_oacc_kernels, to leave all loops that aren't
part of a kernels region alone.



Updated for moving pass_oacc_kernels down past pass_fre in the pass list.

Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
  - Tom
[PATCH 3/7] Add pass_ch_oacc_kernels to pass_oacc_kernels

2014-11-25  Tom de Vries  t...@codesourcery.com

	* omp-low.c (loop_in_oacc_kernels_region_p): New function.
	* omp-low.h (loop_in_oacc_kernels_region_p): Declare.
	* passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels.
	* tree-pass.h (make_pass_ch_oacc_kernels): Declare
	* tree-ssa-loop-ch.c: Include omp-low.h.
	(pass_ch_execute): Declare.
	(pass_ch::execute): Factor out ...
	(pass_ch_execute): ... this new function.  If handling oacc kernels,
	skip loops that are not in oacc kernels region.
	(pass_ch_oacc_kernels::execute):
	(pass_data_ch_oacc_kernels): New pass_data.
	(class pass_ch_oacc_kernels): New pass.
	(pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New
	function.
---
 gcc/omp-low.c  | 83 ++
 gcc/omp-low.h  |  2 ++
 gcc/passes.def |  1 +
 gcc/tree-pass.h|  1 +
 gcc/tree-ssa-loop-ch.c | 59 +--
 5 files changed, 144 insertions(+), 2 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 3ac546c..543dd48 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13912,4 +13912,87 @@ gimple_stmt_omp_data_i_init_p (gimple stmt)
 		   SSA_OP_DEF);
 }
 
+/* Return true if LOOP is inside a kernels region.  */
+
+bool
+loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry,
+			   basic_block *region_exit)
+{
+  bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
+  bitmap region_bitmap = BITMAP_GGC_ALLOC ();
+  bitmap_clear (region_bitmap);
+
+  if (region_entry != NULL)
+*region_entry = NULL;
+  if (region_exit != NULL)
+*region_exit = NULL;
+
+  basic_block bb;
+  gimple last;
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  if (bitmap_bit_p (region_bitmap, bb-index))
+	continue;
+
+  last = last_stmt (bb);
+  if (!last)
+	continue;
+
+  if (gimple_code (last) != GIMPLE_OACC_KERNELS)
+	continue;
+
+  bitmap_clear (excludes_bitmap);
+  bitmap_set_bit (excludes_bitmap, bb-index);
+
+  vecbasic_block dominated
+	= get_all_dominated_blocks (CDI_DOMINATORS, bb);
+
+  unsigned di;
+  basic_block dom;
+
+  basic_block end_region = NULL;
+  FOR_EACH_VEC_ELT (dominated, di, dom)
+	{
+	  if (dom == bb)
+	continue;
+
+	  last = last_stmt (dom);
+	  if (!last)
+	continue;
+
+	  if (gimple_code (last) != GIMPLE_OMP_RETURN)
+	continue;
+
+	  if (end_region == NULL
+	  || dominated_by_p (CDI_DOMINATORS, end_region, dom))
+	end_region = dom;
+	}
+
+  vecbasic_block excludes
+	= get_all_dominated_blocks (CDI_DOMINATORS, end_region);
+
+  unsigned di2;
+  basic_block exclude;
+
+  FOR_EACH_VEC_ELT (excludes, di2, exclude)
+	if (exclude != end_region)
+	  bitmap_set_bit (excludes_bitmap, exclude-index);
+
+  FOR_EACH_VEC_ELT (dominated, di, dom)
+	if (!bitmap_bit_p (excludes_bitmap, dom-index))
+	  bitmap_set_bit (region_bitmap, dom-index);
+
+  if (bitmap_bit_p (region_bitmap, loop-header-index))
+	{
+	  if (region_entry != NULL)
+	*region_entry = bb;
+	  if (region_exit != NULL)
+	*region_exit = end_region;
+	  return true;
+	}
+}
+
+  return false;
+}
+
 #include gt-omp-low.h
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index 32076e4..30df867 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -29,6 +29,8 @@ extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
 extern void omp_finish_file (void);
 extern bool gimple_stmt_omp_data_i_init_p (gimple);
+extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block

Re: [PATCH, 4/8] Add pass_tree_loop_{init,done} to pass_oacc_kernels

2014-11-25 Thread Tom de Vries

On 15-11-14 18:21, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds pass_tree_loop_init and pass_tree_loop_init_done to
pass_oacc_kernels.

Pass_parallelize_loops is run between these passes in the pass group
pass_tree_loop, since it requires loop information.  We do the same for
pass_oacc_kernels.



Updated for moving pass_oacc_kernels down past pass_fre in the pass list.

Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
  - Tom
[PATCH 4/7] Add pass_tree_loop_{init,done} to pass_oacc_kernels

2014-11-25  Tom de Vries  t...@codesourcery.com

	* passes.def: Run pass_tree_loop_init and pass_tree_loop_done in pass
	group pass_oacc_kernels.
	* tree-ssa-loop.c (pass_tree_loop_init::clone)
	(pass_tree_loop_done::clone): New function.
---
 gcc/passes.def  | 2 ++
 gcc/tree-ssa-loop.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/gcc/passes.def b/gcc/passes.def
index 01368bb..37e08a8 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -91,7 +91,9 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	  NEXT_PASS (pass_ch_oacc_kernels);
+	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_expand_omp_ssa);
+	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_merge_phi);
 	  NEXT_PASS (pass_cd_dce);
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index c29aa22..c78b013 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -269,6 +269,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *);
+  opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }
 
 }; // class pass_tree_loop_init
 
@@ -563,6 +564,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+  opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }
 
 }; // class pass_tree_loop_done
 
-- 
1.9.1



Re: [PATCH, 5/8] Add pass_loop_im to pass_oacc_kernels

2014-11-25 Thread Tom de Vries

On 15-11-14 18:22, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds pass_loop_im to pass group pass_oacc_kernels.

We need this pass to simplify the loop body, and allow pass_parloops to detect
that loop iterations are independent.



Updated for moving pass_oacc_kernels down past pass_fre in the pass list.

Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
  - Tom
[PATCH 5/7] Add pass_loop_im to pass_oacc_kernels

2014-11-25  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_lim in pass group pass_ch_oacc_kernels.

	* c-c++-common/restrict-2.c: Update for new pass_lim.
	* c-c++-common/restrict-4.c: Same.
	* g++.dg/tree-ssa/pr33615.C:  Same.
	* g++.dg/tree-ssa/restrict1.C: Same.
	* gcc.dg/tm/pub-safety-1.c:  Same.
	* gcc.dg/tm/reg-promotion.c:  Same.
	* gcc.dg/tree-ssa/20050314-1.c:  Same.
	* gcc.dg/tree-ssa/loop-32.c: Same.
	* gcc.dg/tree-ssa/loop-33.c: Same.
	* gcc.dg/tree-ssa/loop-34.c: Same.
	* gcc.dg/tree-ssa/loop-35.c: Same.
	* gcc.dg/tree-ssa/loop-7.c: Same.
	* gcc.dg/tree-ssa/pr23109.c: Same.
	* gcc.dg/tree-ssa/restrict-3.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-1.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-10.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-11.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-12.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-2.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-3.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-6.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-7.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-8.c: Same.
	* gcc.dg/tree-ssa/ssa-lim-9.c: Same.
	* gcc.dg/tree-ssa/structopt-1.c: Same.
	* gfortran.dg/pr32921.f: Same.
---
 gcc/passes.def  | 1 +
 gcc/testsuite/c-c++-common/restrict-2.c | 6 +++---
 gcc/testsuite/c-c++-common/restrict-4.c | 6 +++---
 gcc/testsuite/g++.dg/tree-ssa/pr33615.C | 6 +++---
 gcc/testsuite/g++.dg/tree-ssa/restrict1.C   | 6 +++---
 gcc/testsuite/gcc.dg/tm/pub-safety-1.c  | 6 +++---
 gcc/testsuite/gcc.dg/tm/reg-promotion.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-32.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-33.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-34.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/loop-35.c | 8 
 gcc/testsuite/gcc.dg/tree-ssa/loop-7.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/pr23109.c | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-3.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-1.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-10.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-11.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-12.c  | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-2.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-3.c   | 8 
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-6.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-7.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-8.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-lim-9.c   | 6 +++---
 gcc/testsuite/gcc.dg/tree-ssa/structopt-1.c | 6 +++---
 gcc/testsuite/gfortran.dg/pr32921.f | 6 +++---
 27 files changed, 81 insertions(+), 80 deletions(-)

diff --git a/gcc/passes.def b/gcc/passes.def
index 37e08a8..438d292 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -92,6 +92,7 @@ along with GCC; see the file COPYING3.  If not see
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	  NEXT_PASS (pass_ch_oacc_kernels);
 	  NEXT_PASS (pass_tree_loop_init);
+	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
diff --git a/gcc/testsuite/c-c++-common/restrict-2.c b/gcc/testsuite/c-c++-common/restrict-2.c
index 3f71b77..f0b0e15a 100644
--- a/gcc/testsuite/c-c++-common/restrict-2.c
+++ b/gcc/testsuite/c-c++-common/restrict-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options -O -fno-strict-aliasing -fdump-tree-lim1-details } */
+/* { dg-options -O -fno-strict-aliasing -fdump-tree-lim2-details } */
 
 void foo (float * __restrict__ a, float * __restrict__ b, int n, int j)
 {
@@ -10,5 +10,5 @@ void foo (float * __restrict__ a, float * __restrict__ b, int n, int j)
 
 /* We should move the RHS of the store out of the loop.  */
 
-/* { dg-final { scan-tree-dump-times Moving statement 11 lim1 } } */
-/* { dg-final { cleanup-tree-dump lim1

Re: [PATCH, 6/8] Add pass_ccp to pass_oacc_kernels

2014-11-25 Thread Tom de Vries

On 15-11-14 18:22, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds pass_loop_ccp to pass group pass_oacc_kernels.

We need this pass to simplify the loop body, and allow pass_parloops to detect
that loop iterations are independent.



As suggested here ( https://gcc.gnu.org/ml/gcc-patches/2014-11/msg02993.html ) 
I've replaced the pass_ccp with pass_copyprop, which performs trivial constant 
propagation in addition to copy propagation.


Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
- Tom

[PATCH 6/7] Add pass_copy_prop in pass_oacc_kernels

2014-11-25  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_copy_prop to pass group pass_oacc_kernels.
	* tree-ssa-copy.c (stmt_may_generate_copy): Handle .omp_data_i init
	conservatively.
---
 gcc/passes.def  | 1 +
 gcc/tree-ssa-copy.c | 4 
 2 files changed, 5 insertions(+)

diff --git a/gcc/passes.def b/gcc/passes.def
index 438d292..fb0d331 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -93,6 +93,7 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_ch_oacc_kernels);
 	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_lim);
+	  NEXT_PASS (pass_copy_prop);
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
diff --git a/gcc/tree-ssa-copy.c b/gcc/tree-ssa-copy.c
index 7c22c5e..d6eb7a7 100644
--- a/gcc/tree-ssa-copy.c
+++ b/gcc/tree-ssa-copy.c
@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3.  If not see
 #include tree-scalar-evolution.h
 #include tree-ssa-dom.h
 #include tree-ssa-loop-niter.h
+#include omp-low.h
 
 
 /* This file implements the copy propagation pass and provides a
@@ -110,6 +111,9 @@ stmt_may_generate_copy (gimple stmt)
   if (gimple_has_volatile_ops (stmt))
 return false;
 
+  if (gimple_stmt_omp_data_i_init_p (stmt))
+return false;
+
   /* Statements with loads and/or stores will never generate a useful copy.  */
   if (gimple_vuse (stmt))
 return false;
-- 
1.9.1



Re: [PATCH, 7/8] Add pass_parloops_oacc_kernels to pass_oacc_kernels

2014-11-25 Thread Tom de Vries

On 15-11-14 18:23, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

Hi,

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
 1  Expand oacc kernels after pass_build_ealias
 2  Add pass_oacc_kernels
 3  Add pass_ch_oacc_kernels to pass_oacc_kernels
 4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
 5  Add pass_loop_im to pass_oacc_kernels
 6  Add pass_ccp to pass_oacc_kernels
 7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
 8  Do simple omp lowering for no address taken var
...


This patch adds:
- a specialized version of pass_parallelize_loops called
 pass_parloops_oacc_kernels to pass group pass_oacc_kernels, and
- relevant test-cases.

The pass only handles loops that are in a kernels region, and skips over bits of
pass_parallelize_loops that are already done for oacc kernels.

The pass reintroduces the use of omp_expand_local, I haven't managed to make it
work yet using the external pass pass_expand_omp_ssa.

An obvious limitation of the patch is the fact that we copy over the clauses
from the kernels directive to the generated parallel directive. We'll need to do
something more intelligent here, f.i. setting vector_length based on the
parallelization factor.

Another limitation is that the pass still needs -ftree-parallelize-loops to
trigger.



Updated for using pass_copyprop instead of pass_ccp in pass_oacc_kernels.

Bootstrapped and reg-tested as before.

OK for trunk?

Thanks,
- Tom

[PATCH 7/7] Add pass_parloops_oacc_kernels to pass_oacc_kernels

2014-11-25  Tom de Vries  t...@codesourcery.com

	* passes.def: Add pass_parallelize_loops_oacc_kernels in pass group
	pass_oacc_kernels.  Move pass_expand_omp_ssa into pass group
	pass_oacc_kernels.
	* tree-parloops.c (create_parallel_loop): Add function parameters
	region_entry and bool oacc_kernels_p.  Handle oacc_kernels_p.
	(gen_parallel_loop): Same.  Use omp_expand_local if oacc_kernels_p.
	Call create_parallel_loop with additional args.
	(parallelize_loops): Add function parameter oacc_kernels_p.  Calculate
	dominance info.  Skip loops that are not in a kernels region. Call
	gen_parallel_loop with additional args.
	(pass_parallelize_loops::execute): Call parallelize_loops with false
	argument.
	(pass_data_parallelize_loops_oacc_kernels): New pass_data.
	(class pass_parallelize_loops_oacc_kernels): New pass.
	(pass_parallelize_loops_oacc_kernels::execute)
	(make_pass_parallelize_loops_oacc_kernels): New function.
	* tree-pass.h (make_pass_parallelize_loops_oacc_kernels): Declare.

	* testsuite/libgomp.oacc-c/oacc-kernels-2-run.c: New test.
	* testsuite/libgomp.oacc-c/oacc-kernels-run.c: New test.

	* gcc.dg/oacc-kernels-2.c: New test.
	* gcc.dg/oacc-kernels.c: New test.
---
 gcc/passes.def |   1 +
 gcc/testsuite/gcc.dg/oacc-kernels-2.c  |  79 +++
 gcc/testsuite/gcc.dg/oacc-kernels.c|  71 ++
 gcc/tree-parloops.c| 242 -
 gcc/tree-pass.h|   2 +
 .../testsuite/libgomp.oacc-c/oacc-kernels-2-run.c  |  65 ++
 .../testsuite/libgomp.oacc-c/oacc-kernels-run.c|  59 +
 7 files changed, 464 insertions(+), 55 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/oacc-kernels-2.c
 create mode 100644 gcc/testsuite/gcc.dg/oacc-kernels.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/oacc-kernels-2-run.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c/oacc-kernels-run.c

diff --git a/gcc/passes.def b/gcc/passes.def
index fb0d331..d91283b 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_tree_loop_init);
 	  NEXT_PASS (pass_lim);
 	  NEXT_PASS (pass_copy_prop);
+  	  NEXT_PASS (pass_parallelize_loops_oacc_kernels);
 	  NEXT_PASS (pass_expand_omp_ssa);
 	  NEXT_PASS (pass_tree_loop_done);
 	  POP_INSERT_PASSES ()
diff --git a/gcc/testsuite/gcc.dg/oacc-kernels-2.c b/gcc/testsuite/gcc.dg/oacc-kernels-2.c
new file mode 100644
index 000..1ff4bad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/oacc-kernels-2.c
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target fopenacc } */
+/* { dg-options -fopenacc -ftree-parallelize-loops=32 -O2 -std=c99 -fdump-tree-parloops_oacc_kernels-all -fdump-tree-copyrename } */
+
+#include stdlib.h
+#include stdio.h
+
+#define N (1024 * 512)
+#define N_REF 4293394432
+
+#if 1
+#define COUNTERTYPE unsigned int
+#else
+#define COUNTERTYPE int
+#endif
+
+int
+main (void)
+{
+  unsigned int i;
+
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+
+  a = malloc (N * sizeof (unsigned int));
+  b = malloc (N * sizeof (unsigned int));
+  c

Re: [PATCH] gcc parallel make check

2014-11-25 Thread Tom de Vries

On 15-09-14 18:05, Jakub Jelinek wrote:

libstdc++-v3/
* testsuite/Makefile.am (check_p_numbers0, check_p_numbers1,
check_p_numbers2, check_p_numbers3, check_p_numbers4,
check_p_numbers5, check_p_numbers6, check_p_numbers,
check_p_subdirs): New variables.
(check_DEJAGNU_normal_targets): Use check_p_subdirs.
(check-DEJAGNU): Rewritten so that for parallelized
testing each job runs all the *.exp files, with
GCC_RUNTEST_PARALLELIZE_DIR set in environment.
* testsuite/Makefile.in: Regenerated.
* testsuite/lib/libstdc++.exp (gcc_parallel_test_run_p,
gcc_parallel_test_enable): New procedures.  If
GCC_RUNTEST_PARALLELIZE_DIR is set in environment, override
runtest_file_p to invoke also gcc_parallel_test_run_p.
* testsuite/libstdc++-abi/abi.exp: Run all the tests serially
by the first parallel runtest encountering it.  Fix up path
of the extract_symvers script.
* testsuite/libstdc++-xmethods/xmethods.exp: Run all the tests
serially by the first parallel runtest encountering it.  Run
dg-finish even in case of error.


When comparing test results of patch builds with test results of reference 
builds, the only differences I'm seeing are random differences in amount of 
'UNSUPPORTED: prettyprinter.exp'.


This patch fixes that by ensuring that we print that unsupported message only 
once.

The resulting test result comparison diff is:
...
--- without/FAIL  2014-11-24 17:46:32.202673282 +0100
+++ with/FAIL 2014-11-25 13:45:15.636131571 +0100
 libstdc++-v3/testsuite/libstdc++.sum:UNSUPPORTED: prettyprinters.exp
-libstdc++-v3/testsuite/libstdc++.sum:UNSUPPORTED: prettyprinters.exp
-libstdc++-v3/testsuite/libstdc++.sum:UNSUPPORTED: prettyprinters.exp
-libstdc++-v3/testsuite/libstdc++.sum:UNSUPPORTED: prettyprinters.exp
-libstdc++-v3/testsuite/libstdc++.sum:UNSUPPORTED: prettyprinters.exp
 libstdc++-v3/testsuite/libstdc++.sum:UNSUPPORTED: xmethods.exp
...

Furthermore, the patch adds a dg-finish in case the prettyprinters.exp file is 
unsupported, which AFAIU is also required in that case.


Bootstrapped and reg-tested on x86_64.

OK for trunk/stage3?

Thanks,
- Tom


2014-11-25  Tom de Vries  t...@codesourcery.com

	* testsuite/libstdc++-prettyprinters/prettyprinters.exp: Add missing
	dg-finish.  Only print unsupported message once.
---
 libstdc++-v3/testsuite/libstdc++-prettyprinters/prettyprinters.exp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/prettyprinters.exp b/libstdc++-v3/testsuite/libstdc++-prettyprinters/prettyprinters.exp
index a57660f..e5be5b5 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/prettyprinters.exp
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/prettyprinters.exp
@@ -30,7 +30,14 @@ if ![info exists ::env(GUALITY_GDB_NAME)] {
 }
 
 if {! [gdb_version_check]} {
+dg-finish
+# Only print unsupported message in one instance.
+if ![gcc_parallel_test_run_p prettyprinters] {
+	return
+}
+gcc_parallel_test_enable 0
 unsupported prettyprinters.exp
+gcc_parallel_test_enable 1
 return
 }
 
-- 
1.9.1



Re: [PATCH] Add verify_sese

2014-11-25 Thread Tom de Vries

On 25-11-14 10:28, Richard Biener wrote:

On Tue, Nov 25, 2014 at 1:01 AM, Tom de Vries tom_devr...@mentor.com wrote:

Richard,

I ran into a problem with my oacc kernels directive patch series where
tail-merge added another entry into a region that was previously
single-entry-single-exit.

That resulted in hitting this assert in calc_dfs_tree:
...
   /* This aborts e.g. when there is _no_ path from ENTRY to EXIT at all.  */
   gcc_assert (di-nodes == (unsigned int) n_basic_blocks_for_fn (cfun) - 1);
...
during a call to move_sese_region_to_fn.

This patch makes sure that we abort earlier, with a clearer message of what
is actually wrong.

Bootstrapped and reg-tested on x86_64.

OK for trunk/stage3?


I believe someone made the function work for SEME regions and I believe
it is actually used to copy loops with multiple exits


This is the first part of the function comment for move_sese_region_to_fn:
...
/* Move a single-entry, single-exit region delimited by ENTRY_BB and
   EXIT_BB to function DEST_CFUN.  The whole region is replaced by a
   single basic block in the original CFG and the new basic block is
   returned.  DEST_CFUN must not have a CFG yet.

   Note that the region need not be a pure SESE region.  Blocks inside
   the region may contain calls to abort/exit.  The only restriction
   is that ENTRY_BB should be the only entry point and it must
   dominate EXIT_BB.
...

I'm guessing you're referring to the 'not pure SESE region' bit?

So in fact, it's not a single-entry-single-exit region, but more a 
single-entry-at-most-one-continuation region. [ Note that in case of f.i. an 
eternal loop, we can also have single entry, no continuation. ]



so I don't see how the
patch can work in these cases?



The bbs with calls to abort/exit don't have any successor edges. verify_sese 
doesn't assert anything specific about suchs bbs.


Thanks,
- Tom



[PING] Fix gcc_assert in expand_omp_for_static_chunk

2014-11-26 Thread Tom de Vries

On 12-11-14 11:00, Tom de Vries wrote:

Jakub,

this patch fixes a gcc_assert in expand_omp_for_static_chunk.

The assert follows a loop with composite loop condition:
...
   vecedge_var_map *head = redirect_edge_var_map_vector (re);
   ene = single_succ_edge (entry_bb);

   psi = gsi_start_phis (fin_bb);
   for (i = 0; !gsi_end_p (psi)  head-iterate (i, vm);
gsi_next (psi), ++i)
...

AFAIU, the intention of the assert is that it tries to check that both:
- all phis have been handled (gsi_end_p (psi)), and
- all elements of head have been used (head-length () == i).
In other words, that we have stopped iterating because both loop conditions are
false.

The current assert checks that *not* all phis have been handled:
...
   gcc_assert (!gsi_end_p (psi)  i == head-length ());
...

Looking back in the history, it seems we started out with the 'all phis handled'
semantics, but I suspect that that got lost due to a typo:
...
79acaae1 2007-09-07
   gcc_assert (!phi  !args);

75a70cf95 2008-07-28
   gcc_assert (!gsi_end_p (psi)  i == VEC_length (edge_var_map, head));

f1f41a6c 2012-11-18
   gcc_assert (!gsi_end_p (psi)  i == head-length ());
...

Now, if the current assert is incorrect, why didn't it trigger?

The assert is in ssa-handling code in expand_omp_for_static_chunk. Ssa-handling
code in omp-low.c is only triggered by pass_parallelize_loops, and that pass
doesn't specify a chunk size on the GIMPLE_OMP_FOR it constructs, so that will
only call expand_omp_for_static_nochunk.

I managed to trigger this assert in my oacc kernels directive patch set (on top
of the gomp-4_0-branch), which constructs an oacc for loop in
pass_parallelize_loops, and then this code in gomp-4_0-branch has the effect
that we trigger expand_omp_for_static_chunk:
...
//TODO
   /* For OpenACC loops, force a chunk size of one, as this avoids the default
  scheduling where several subsequent iterations are being executed by the
  same thread.  */
   if (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
 {
   gcc_assert (fd-chunk_size == NULL_TREE);
   fd-chunk_size = build_int_cst (TREE_TYPE (fd-loop.v), 1);
 }
...

So, AFAIU, this assert (and associated ssa-handling code in
expand_omp_for_static_chunk) is dead on trunk, but I'm excercising the code
currently in my patch series, so I'd prefer to fix it rather than remove it.

Bootstrapped and reg-tested on x86_64, on top of trunk, gomp-4_0-branch and
internal oacc dev branch.

OK for trunk?



Ping.

Thanks,
- Tom


0001-Fix-gcc_assert-in-expand_omp_for_static_chunk.patch


2014-11-12  Tom de Vriest...@codesourcery.com

* omp-low.c (expand_omp_for_static_chunk): Fix assert.
---
  gcc/omp-low.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b59d069..5210de1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6775,7 +6775,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
  locus = redirect_edge_var_map_location (vm);
  add_phi_arg (nphi, redirect_edge_var_map_def (vm), re, locus);
}
-  gcc_assert (!gsi_end_p (psi)  i == head-length ());
+  gcc_assert (gsi_end_p (psi)  i == head-length ());
redirect_edge_var_map_clear (re);
while (1)
{
-- 1.9.1





Re: Check that unlinked uses do not contain ssa-names when renaming.

2014-10-16 Thread Tom de Vries
On 08/10/12 11:24, Richard Guenther wrote:
 On Sun, Oct 7, 2012 at 12:44 PM, Tom de Vries tom_devr...@mentor.com wrote:
 Richard,

 attached patch checks that unlinked uses do not contain ssa-names when 
 renaming.

 This assert triggers when compiling (without the fix) the PR54735 example.

 AFAIU, it was due to chance that we caught the PR54735 bug by hitting the
 verification failure, because the new vdef introduced by renaming happened 
 to be
 the same name as the ssa name referenced in the invalid unlinked use (in 
 terms
 of maybe_replace_use: rdef == use).

 The assert from this patch catches all cases that an unlinked use contains an
 ssa-name.

 Bootstrapped and reg-tested on x86_64 (Ada inclusive).

 OK for trunk?
 
 I don't think that is exactly what we should assert here ... (I thought about
 adding checking myself ...).  What we'd want to assert is that before
 any new DEF is registered (which may re-allocate an SSA name) that
 no uses with SSA_NAME_IN_FREELIST appear.  Thus, a light verification
 pass would be necessary at the beginning of update_ssa
 (which I queued onto my TODO list ...).  We'd want that anyway to for
 example catch the case where a non-virtual operand is partially renamed.
 

Richard,

while developing a patch, I ran into the same 'no immediate_use list'
verification error again, caused by an unlinked use containing an ssa-name.

The verification error was caused by an error in my patch, but triggered by
chance, by an unrelated change in the patch.

I've tried to implement the 'light verification pass' you describe above, and
I've checked that the error in my patch is found, also when I remove the trigger
for the verification error from my patch.

Bootstrapped and reg-tested on x86_64 (with the ENABLE_CHECKING guarding
removed, in order to ensure the code is active).

OK for trunk?

Thanks,
- Tom


2014-10-16  Tom de Vries  t...@codesourcery.com

	* tree-into-ssa.c (update_ssa): Assert that there's no ssa use operand
	with SSA_NAME_IN_FREELIST.

diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index 01203de..227d5bb 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
update_ssa (unsigned update_flags)
 
   timevar_push (TV_TREE_SSA_INCREMENTAL);
 
+#ifdef ENABLE_CHECKING
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi))
+	{
+	  gimple stmt = gsi_stmt (gsi);
+
+	  ssa_op_iter i;
+	  use_operand_p use_p;
+	  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_ALL_USES)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  if (TREE_CODE (use) != SSA_NAME)
+		continue;
+
+	  gcc_assert (!SSA_NAME_IN_FREE_LIST (use));
+	}
+	}
+}
+#endif
+
   if (dump_file  (dump_flags  TDF_DETAILS))
 fprintf (dump_file, \nUpdating SSA:\n);
 
-- 
1.9.1



[PATCH, PR61605, 1/2] Handle copy cycles in pass_cprop_hardreg

2014-10-16 Thread Tom de Vries

Eric,

this patch is the first half of the fix for PR61605.

The problem it addresses is the following: Consider this copy cycle (a = b; b = 
a):
...
(insn 2 18 3 2 (set (reg/v:SI 1 dx [orig:86 yD.1749 ] [86])
(reg:SI 5 di [ yD.1749 ])) test.c:9 90 {*movsi_internal}
 (expr_list:REG_DEAD (reg:SI 5 di [ yD.1749 ])
(nil)))
(note 3 2 6 2 NOTE_INSN_FUNCTION_BEG)
(insn 6 3 7 2 (set (reg:SI 5 di)
(reg/v:SI 1 dx [orig:86 yD.1749 ] [86])) test.c:10 90 {*movsi_internal}
 (nil))
...

cprop_hardreg handles this currently in the following way:
- it processes the first copy, and sets up di as representant of dx.
- it then processes the second copy, and replaces the dx with di:
  ...
  (insn 6 3 7 2 (set (reg:SI 5 di)
(reg:SI 5 di [orig:86 yD.1749 ] [86])) test.c:10 90 {*movsi_internal}
 (nil))
  ...
  turning it into a noop.

pass_fast_rtl_dce subsequently removes the noop.

However, while processing the second copy, it considers the set of di in insn 6 
as killing, and removes di as representant of dx. So a use of dx in a following 
insn is not replaced by di.


By running pass_cprop_hardreg once more after pass_fast_rtl_dce, we do manage to 
replace the use of dx in a following insn by di.


This patch achieves the same, without rerunning pass_cprop_hardreg. It ensures 
in copyprop_hardreg_forward_1 that the set of a dest by a noop is not considered 
killing.


Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
2014-10-13  Tom de Vries  t...@codesourcery.com

	PR rtl-optimization/61605
	* regcprop.c (copyprop_hardreg_forward_1): Add copy_p and noop_p.  Don't
	notice stores for noops.  Don't regard noops as copies.

diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index 3297721..c71de98 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -1032,12 +1032,21 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd)
 	note_stores (PATTERN (insn), kill_clobbered_value, vd);
 	}
 
-  /* Notice stores.  */
-  note_stores (PATTERN (insn), kill_set_value, ksvd);
+  bool copy_p = (set
+		  REG_P (SET_DEST (set))
+		  REG_P (SET_SRC (set)));
+  bool noop_p = (copy_p
+		  rtx_equal_p (SET_DEST (set), SET_SRC (set)));
 
-  /* Notice copies.  */
-  if (set  REG_P (SET_DEST (set))  REG_P (SET_SRC (set)))
-	copy_value (SET_DEST (set), SET_SRC (set), vd);
+  if (!noop_p)
+	{
+	  /* Notice stores.  */
+	  note_stores (PATTERN (insn), kill_set_value, ksvd);
+
+	  /* Notice copies.  */
+	  if (copy_p)
+	copy_value (SET_DEST (set), SET_SRC (set), vd);
+	}
 
   if (insn == BB_END (bb))
 	break;
-- 
1.9.1



Re: Check that unlinked uses do not contain ssa-names when renaming.

2014-10-16 Thread Tom de Vries

On 16-10-14 10:14, Richard Biener wrote:

On Thu, Oct 16, 2014 at 9:20 AM, Tom de Vries tom_devr...@mentor.com wrote:

On 08/10/12 11:24, Richard Guenther wrote:

On Sun, Oct 7, 2012 at 12:44 PM, Tom de Vries tom_devr...@mentor.com wrote:

Richard,

attached patch checks that unlinked uses do not contain ssa-names when renaming.

This assert triggers when compiling (without the fix) the PR54735 example.

AFAIU, it was due to chance that we caught the PR54735 bug by hitting the
verification failure, because the new vdef introduced by renaming happened to be
the same name as the ssa name referenced in the invalid unlinked use (in terms
of maybe_replace_use: rdef == use).

The assert from this patch catches all cases that an unlinked use contains an
ssa-name.

Bootstrapped and reg-tested on x86_64 (Ada inclusive).

OK for trunk?


I don't think that is exactly what we should assert here ... (I thought about
adding checking myself ...).  What we'd want to assert is that before
any new DEF is registered (which may re-allocate an SSA name) that
no uses with SSA_NAME_IN_FREELIST appear.  Thus, a light verification
pass would be necessary at the beginning of update_ssa
(which I queued onto my TODO list ...).  We'd want that anyway to for
example catch the case where a non-virtual operand is partially renamed.



Richard,

while developing a patch, I ran into the same 'no immediate_use list'
verification error again, caused by an unlinked use containing an ssa-name.

The verification error was caused by an error in my patch, but triggered by
chance, by an unrelated change in the patch.

I've tried to implement the 'light verification pass' you describe above, and
I've checked that the error in my patch is found, also when I remove the trigger
for the verification error from my patch.

Bootstrapped and reg-tested on x86_64 (with the ENABLE_CHECKING guarding
removed, in order to ensure the code is active).

OK for trunk?


Ok with changing the gcc_assert to

   if (SSA_NAME_IN_FREE_LIST (use))
 {
error (statement uses released SSA name);
debug_gimple_stmt (stmt);
err = true;
 }

and after checking all stmts

   if (err)
 internal_error (cannot update SSA form);

you might want to push/pop TV_TREE_STMT_VERIFY around all this
as well.



Richard,

I've implemented the changes listed above, and also made the message a bit more 
verbose:

...
kernels-2.c: In function ‘main’:
kernels-2.c:41:5: error: statement uses released SSA name
 for (COUNTERTYPE ii = 0; ii  N; ii++)
 ^
# .MEM_57 = VDEF .MEM_79
.omp_data_arr.10 ={v} {CLOBBER};
The use of .MEM_79 should have been replaced or marked for renaming
kernels-2.c:41:5: internal compiler error: cannot update SSA from
...

I've added mentioning the specific use that has the problem, since it will not 
always be evident which is the one with the problem.


OK for trunk?

If that's too verbose I can also implement instead:
...
kernels-2.c:41:5: error: statement uses released SSA name .MEM_79
...

Thanks,
- Tom

2014-10-16  Tom de Vries  t...@codesourcery.com

	* tree-into-ssa.c (update_ssa): Assert that there's no ssa use operand
	with SSA_NAME_IN_FREELIST.

diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index 01203de..dcfba3c 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
@@ -3161,6 +3161,47 @@ update_ssa (unsigned update_flags)
   if (!need_ssa_update_p (cfun))
 return;
 
+#ifdef ENABLE_CHECKING
+  timevar_push (TV_TREE_STMT_VERIFY);
+
+  bool err = false;
+
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi))
+	{
+	  gimple stmt = gsi_stmt (gsi);
+
+	  ssa_op_iter i;
+	  use_operand_p use_p;
+	  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_ALL_USES)
+	{
+	  tree use = USE_FROM_PTR (use_p);
+	  if (TREE_CODE (use) != SSA_NAME)
+		continue;
+
+	  if (SSA_NAME_IN_FREE_LIST (use))
+		{
+		  error (statement uses released SSA name:);
+		  debug_gimple_stmt (stmt);
+		  fprintf (stderr, The use of );
+		  print_generic_expr (stderr, use, 0);
+		  fprintf (stderr,
+			should have been replaced or marked for renaming
+			   \n);
+		  err = true;
+		}
+	}
+	}
+}
+
+  if (err)
+internal_error (cannot update SSA form);
+
+  timevar_pop (TV_TREE_STMT_VERIFY);
+#endif
+
   timevar_push (TV_TREE_SSA_INCREMENTAL);
 
   if (dump_file  (dump_flags  TDF_DETAILS))
-- 
1.9.1



Re: Check that unlinked uses do not contain ssa-names when renaming.

2014-10-16 Thread Tom de Vries

On 16-10-14 14:20, Richard Biener wrote:

Richard,

I've implemented the changes listed above, and also made the message a bit
more verbose:
...
kernels-2.c: In function ‘main’:
kernels-2.c:41:5: error: statement uses released SSA name
  for (COUNTERTYPE ii = 0; ii  N; ii++)
  ^
# .MEM_57 = VDEF .MEM_79
.omp_data_arr.10 ={v} {CLOBBER};
The use of .MEM_79 should have been replaced or marked for renaming

^^^ or marked for renaming is not correct, only replacing is



I've checked in the version with should have been replaced.

I was trying to mention both possibilities that you mentioned here: 
https://gcc.gnu.org/ml/gcc-patches/2011-10/msg00977.html:

...
Whoever unlinks the vuse (by removing its definition) has to replace it with 
something valid, which is either the bare symbol .MEM, or the VUSE associated 
with the removed VDEF (thus, as unlink_stmt_vdef does).

...

So, I hope better formulated this time, what I intended to state was:
...
The use of .MEM_79 should have been replaced with either the underlying symbol 
or a valid SSA name.

...

But perhaps that's not generally valid? I've browsed tree-into-ssa.c a bit, and 
I only find the 'replace with underlying symbol' for virtual operands.


Thanks,
- Tom



Re: -fuse-caller-save - Collect register usage information

2014-10-16 Thread Tom de Vries

On 11-10-14 12:46, Eric Botcazou wrote:

So, I hate the name of the option, and the documentation seems wrong to me.
It doesn’t use the caller saved registers for allocation, it uses the call
clobbered registers for allocation.  Or, one could say it uses the callee
saved registers for allocation.




Eric,
Mike,

thanks for the comments.

I know the following definition of caller-vs-callee saved registers:
- at function entry, a caller-save register can be used immediately, without
  needing to save it first. However, in order to store a value in it that is
  live across calls, it needs to be saved and restored around each call (hence
  the term caller-save).
- at function entry, a callee-save register first needs to be saved before it
  can be used (hence the name callee-save). However, that means that it can be
  used to store a value that is live across calls, without further need for
  saving/restoring.

So, AFAIU, call clobbered corresponds with caller-save, not with callee saved. 
So I'd say the documentation is in fact correct.


Having said that, in my mind, what is confusing about the name 
-fuse-caller-save, is that in fact the caller-save registers are already used in 
register allocation. It's just that they're used across calls without the need 
to save them, but -fuse-caller-save-across-calls-without-saving-if-possible is 
not such a good option name.


Another thing that - in my mind - is confusing is that there's an option 
fcaller-saves which controls behaviour for caller-save registers:

- for -fno-caller-saves, caller-save registers are not used across calls
- for -fcaller-saves, caller-save registers are used across calls
The name is similar to -fuse-caller-save, and it won't be clear from just the 
names what the difference is.



Seconded, the description is a bit confusing and caller saved/callee saved
should be avoided IMO, call clobbered/call saved is much clearer.



I have no objection to go with another terminology in the documentation. But 
before going into a patch, let's settle on the option name.


As for the name, I'm not sure just changing terminology will make things 
clearer, in other words, I'm not sure fuse-call-clobbered is any clearer than 
fuse-caller-save.


I've pondered the name -fipa-ira, but I rejected that earlier because that might 
suggest actual register allocation at the interprocedural scope, while this is 
only register allocation at the scope of a single procedure, taking some 
interprocedural information into account. Furthermore, it's not only ira that 
uses the interprocedural information.


So, let's a generate a list of option names.
-fuse-caller-save
-fuse-call-clobbered
-fprecise-call-clobbers
-foptimize-call-clobbers
-fprune-call-clobbers
-freduce-call-clobbers
-fcall-clobbers-ipa

Any preferences, alternatives?

Thanks,
- Tom



Re: -fuse-caller-save - Collect register usage information

2014-10-17 Thread Tom de Vries

On 16-10-14 23:46, Eric Botcazou wrote:

Having said that, in my mind, what is confusing about the name
-fuse-caller-save, is that in fact the caller-save registers are already
used in register allocation. It's just that they're used across calls
without the need to save them, but
-fuse-caller-save-across-calls-without-saving-if-possible is not such a
good option name.


Agreed.


Another thing that - in my mind - is confusing is that there's an option
fcaller-saves which controls behaviour for caller-save registers:
- for -fno-caller-saves, caller-save registers are not used across calls
- for -fcaller-saves, caller-save registers are used across calls
The name is similar to -fuse-caller-save, and it won't be clear from just
the names what the difference is.


OK, so the existing -fcaller-saves is in fact -fuse-caller-saves,


Right, in the sense that a caller-save is the save of caller-save register, as 
opposed to short for a caller-save register, which is how it's used in 
-fuse-caller-save.



which means
that we should really find a better name for yours. :-)



Agreed :)


I've pondered the name -fipa-ira, but I rejected that earlier because that
might suggest actual register allocation at the interprocedural scope,
while this is only register allocation at the scope of a single procedure,
taking some interprocedural information into account. Furthermore, it's not
only ira that uses the interprocedural information.

So, let's a generate a list of option names.
-fuse-caller-save
-fuse-call-clobbered
-fprecise-call-clobbers
-foptimize-call-clobbers
-fprune-call-clobbers
-freduce-call-clobbers
-fcall-clobbers-ipa

Any preferences, alternatives?


Given the existing -fcaller-saves, I'd keep caller-saves in the name, so
something along the lines of -foptimize-caller-saves or -fipa-caller-saves.



Let's look at the effect of the option (after the recent fix for PR61605) on 
gcc.target/i386/fuse-calller-save.c:

...
 foo:
 .LFB1:
.cfi_startproc
-   pushq   %rbx
-   .cfi_def_cfa_offset 16
-   .cfi_offset 3, -16
-   movl%edi, %ebx
callbar
-   addl%ebx, %eax
-   popq%rbx
-   .cfi_def_cfa_offset 8
+   addl%edi, %eax
ret
.cfi_endproc
 .LFE1:
...
So, the effect is: instead of using a callee-save register, we use a caller-save 
register to store a value that's live over a call, without needing to add a 
caller-save, as would be normally the case.


If I see an option -foptimize-caller-saves, I'd expect the effect to be that 
without, there are some caller-saves and with, there are less. This is not the 
case in the diff above. Nevertheless, if we'd have a case where we already have 
caller-saves, that would be indeed the observed effect. I'm just trying to point 
out that the optimization does more than just removing caller-saves.


The optimization, at it's core, can be regarded as removing superfluous clobbers 
from calls, and everything else is derived from that:

- if a caller-save register is not clobbered by a call, then there's no need
  for a caller-save before that call, so it's cheaper to use across that call
  than a callee-save register.
  (which explains what we see in the diff)
- if a caller-save register is live across a call, and is not clobbered by a
  call, then there's no need for a caller-save, and it can be removed.
  (which explains what we see in case we have an example where there are
   actual caller-saves without the optimization, and less so with the
   optimization)

I'm starting to lean towards -foptimize-call-clobbers or similar.

Thanks,
- Tom


Re: -fuse-caller-save - Collect register usage information

2014-10-19 Thread Tom de Vries

On 17-10-14 21:24, Eric Botcazou wrote:

Let's look at the effect of the option (after the recent fix for PR61605) on
gcc.target/i386/fuse-calller-save.c:
...
   foo:
   .LFB1:
.cfi_startproc
-   pushq   %rbx
-   .cfi_def_cfa_offset 16
-   .cfi_offset 3, -16
-   movl%edi, %ebx
callbar
-   addl%ebx, %eax
-   popq%rbx
-   .cfi_def_cfa_offset 8
+   addl%edi, %eax
ret
.cfi_endproc
   .LFE1:
...
So, the effect is: instead of using a callee-save register, we use a
caller-save register to store a value that's live over a call, without
needing to add a caller-save, as would be normally the case.

If I see an option -foptimize-caller-saves, I'd expect the effect to be that
without, there are some caller-saves and with, there are less. This is not
the case in the diff above.


To me it is, movl %edi, %ebx/addl %ebx, %eax is a caller-save/restore.



I agree that it can look like that. But the insn 'movl %edi, %ebx' is generated 
by assign_parm_setup_reg at expand. AFAIU, the purpose is to decouple the value 
of the argument and its uses from the register it's passed in.


The definition of -fcaller-saves below explains why insn 'movl %edi, %ebx' is 
not a caller-save: because it's not generated before a call, but rather at the 
start of a function. This seems to be confirmed by the fact that the insn 'movl 
%edi, %ebx' is still generated with -fno-caller-saves.



I'm starting to lean towards -foptimize-call-clobbers or similar.


Yes, that's also a good name and was my initial preference.  But you pointed
out the existing -fcaller-saves:

`-fcaller-saves'
  Enable allocation of values to registers that are clobbered by
  function calls, by emitting extra instructions to save and restore
  the registers around such calls.  Such allocation is done only
  when it seems to result in better code.

so -foptimize-caller-saves can be understood as optimizing out the extra
instructions to save and restore  the registers around such calls  and, thus,
as having a direct relationship with -fcaller-saves.


Agree.

But, given the preference of a number of others for fipa-ra, could you live with 
that?


Thanks,
- Tom


Re: [PATCH] register CALL_INSN_FUNCTION_USAGE in find_all_hard_reg_sets

2014-04-16 Thread Tom de Vries

On 16-01-14 09:13, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

* The set of registers which are clobbered during a call by things like the plt
- these are not picked up by the use-caller-save optimization. We need the
hook to inform the compiler about these registers


Right, but...


* And finally, registers clobbered in the caller itself during a sequence of
instructions implementing a function call. On mips, that's R6, which may be
clobbered by the call. Normally that doesn't need mentioning in the RTL 
since
it's a call_used_reg, but since use-caller-save might discover a set of
registers for the called function that does not include R6, it becomes
important to record this clobber explicitly. It could be represented in the
RTL by a clobber on the insn, or a clobber in C_I_F_U. Or it could just be
part of the registers returned by the hook - but that was previously deemed
not acceptable (and it doesn't match the description of the hook).


...why do we need two different mechanisms to deal with these two?
IMO the set recorded for the callee should contain what the callee
instructions clobber and nothing else.  CALL_INSN_FUNCTION_USAGE
should contain everything clobbered by a call outside the callee,
whether that's in the calling function itself, in a PLT, in a MIPS16
stub, or whatever.



Richard,

Is this what you mean?

This patch introduces a hook that specifies which registers are implicitly 
clobbered by a call, not including the registers that are clobbered in the 
called function, and then uses that hook to add those registers to 
CALL_INSN_FUNCTION_USAGE.


Thanks,
- Tom

2013-04-29  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

* target.def (call_clobbered_regs): New DEFHOOK.
* doc/tm.texi.in (@node Stack and Calling): Add Miscellaneous Register
Hooks to @menu.
(@node Miscellaneous Register Hooks): New node.
(@hook TARGET_CALL_CLOBBERED_REGS): New hook.
* doc/tm.texi: Regenerate.
* calls.c (expand_call, emit_library_call_value_1): Add regs in
targetm.call_clobbered_regs to CALL_INSN_FUNCTION_USAGE.

diff --git a/gcc/calls.c b/gcc/calls.c
index e798c7a..edee262 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3191,6 +3191,27 @@ expand_call (tree exp, rtx target, int ignore)
 	  add_reg_note (last, REG_CALL_DECL, datum);
 	}
 
+  if (targetm.call_clobbered_regs != NULL)
+	{
+	  struct hard_reg_set_container call_clobbered_regs;
+	  rtx last = last_call_insn ();
+
+	  CLEAR_HARD_REG_SET (call_clobbered_regs.set);
+	  if (targetm.call_clobbered_regs (fndecl, call_clobbered_regs))
+	{
+	  hard_reg_set_iterator hrsi;
+	  unsigned int i;
+	  EXECUTE_IF_SET_IN_HARD_REG_SET (call_clobbered_regs.set, 0, i, hrsi)
+		{
+		  rtx reg = gen_rtx_REG (word_mode, i);
+		  CALL_INSN_FUNCTION_USAGE (last)
+		= gen_rtx_EXPR_LIST (VOIDmode,
+	 gen_rtx_CLOBBER (VOIDmode, reg),
+	 CALL_INSN_FUNCTION_USAGE (last));
+		}
+	}
+	}
+
   /* If the call setup or the call itself overlaps with anything
 	 of the argument setup we probably clobbered our call address.
 	 In that case we can't do sibcalls.  */
@@ -4226,6 +4247,27 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
   add_reg_note (last, REG_CALL_DECL, datum);
 }
 
+  if (targetm.call_clobbered_regs != NULL)
+{
+  struct hard_reg_set_container call_clobbered_regs;
+  rtx last = last_call_insn ();
+
+  CLEAR_HARD_REG_SET (call_clobbered_regs.set);
+  if (targetm.call_clobbered_regs (fndecl, call_clobbered_regs))
+	{
+	  hard_reg_set_iterator hrsi;
+	  unsigned int i;
+	  EXECUTE_IF_SET_IN_HARD_REG_SET (call_clobbered_regs.set, 0, i, hrsi)
+	{
+	  rtx reg = gen_rtx_REG (word_mode, i);
+	  CALL_INSN_FUNCTION_USAGE (last)
+		= gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_CLOBBER (VOIDmode, reg),
+ CALL_INSN_FUNCTION_USAGE (last));
+	}
+	}
+}
+
   /* Right-shift returned value if necessary.  */
   if (!pcc_struct_value
TYPE_MODE (tfom) != BLKmode
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index b8ca17e..cd52f73 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3091,6 +3091,7 @@ This describes the stack layout and calling conventions.
 * Profiling::
 * Tail Calls::
 * Stack Smashing Protection::
+* Miscellaneous Register Hooks::
 @end menu
 
 @node Frame Layout
@@ -5016,6 +5017,14 @@ normally defined in @file{libgcc2.c}.
 Whether this target supports splitting the stack when the options described in @var{opts} have been passed.  This is called after options have been parsed, so the target may reject splitting the stack in some configurations.  The default version of this hook returns false.  If @var{report} is true, this function may issue a warning or error; if @var{report} is false, it must simply return a value
 @end deftypefn
 
+@node Miscellaneous Register Hooks

[PING] [PATCH] register CALL_INSN_FUNCTION_USAGE in find_all_hard_reg_sets

2014-04-16 Thread Tom de Vries

On 15-01-14 17:53, Tom de Vries wrote:

Eric,

This patch adds scanning of clobbers in CALL_INSN_FUNCTION_USAGE to
find_all_hard_reg_sets.

For MIPS, calls are split at some point. After the split, one of the resulting
insns may clobber $6. But before the split, that's not explicit in the rtl
representation of the unsplit call.

For -fuse-caller-save, that's a problem, and Richard S. suggested to add the
clobber of $6 to the CALL_INSN_FUNCTION_USAGE of the unsplit call.

I wrote a patch for that (
http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00730.html ), but found that doing
so did not fix the problem with -fuse-caller-save, because
find_all_hard_reg_sets (the mechanism -fuse-caller-save uses to detect which
registers are set or clobbered) does not take CALL_INSN_FUNCTION_USAGE into
account. This patch fixes that.

Build and reg-tested on MIPS.

OK for stage1 if x86_64 bootstrap  reg-test succeeds?



Eric,

Ping of this ( http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00888.html ) patch.

Ok for stage1?

Thanks,
- Tom



Re: [PATCH] register CALL_INSN_FUNCTION_USAGE in find_all_hard_reg_sets

2014-04-16 Thread Tom de Vries
On 16/04/14 12:28, Richard Sandiford wrote:
  This patch introduces a hook that specifies which registers are implicitly 
  clobbered by a call, not including the registers that are clobbered in the 
  called function, and then uses that hook to add those registers to 
  CALL_INSN_FUNCTION_USAGE.

 I don't think a new hook is needed.

Richard,

the hook enables us to determine whether a target supplies the information
provided by the hook. If the target does not provide this information, the
fuse-caller-save optimization is possibly not safe.

How do you propose to handle this without this hook?

Apart from that, I don't see the reason why we should add similar code to
several targets, if we can add a hook that specifies information about the
target, and add generic code that handles the information.

Thanks,
- Tom



fuse-caller-save - hook format

2014-04-16 Thread Tom de Vries
Vladimir,

All patches for the fuse-caller-save optimization have been ok-ed. The only part
not approved is the MIPS-specific part.

The objection of Richard S. is not so much the patch itself, but more the idea
of the hook fn_other_hard_reg_usage.

For clarity, I'm restating the current hook definition here:
...
+@deftypefn {Target Hook} bool TARGET_FN_OTHER_HARD_REG_USAGE (struct
hard_reg_set_container *@var{regs})
Add any hard registers to @var{regs} that are set or clobbered by a call to the
function.  This hook only needs to add registers that cannot be found by
examination of the final RTL representation of a function.  This hook returns
true if it managed to determine which registers need to be added.  The default
version of this hook returns false.
...

Richard prefers to, rather than having a hook specifying what registers are
implicitly clobbered, adding those clobbers to CALL_INSN_FUNCTION_USAGE.

I can see these possibilities (and perhaps there are more):

1. We go with Richards proposal: we make each target responsible for adding
these clobbers in CALL_INSN_FUNCTION_USAGE, and use a hook called f.i.
targetm.fuse_caller_save_p or targetm.implicit_call_clobbers_in_fusage_p, to
indicate whether a target has taken care of that, meaning it's safe to do the
fuse-caller-save optimization.

2. A mixed solution: we make each target responsible for specifying which
clobbers need to be added in CALL_INSN_FUNCTION_USAGE, using a hook called f.i.
targetm.call_clobbered_regs, and add generic code to add those clobbers to
CALL_INSN_FUNCTION_USAGE.

3. We stick with the current, approved hook format, and try to convince Richard
to live with it.


Since you are a register allocator maintainer, familiar with the
fuse-caller-save optimization, and have approved the original hook, I would like
to ask you to make a decision on how to proceed from here.

Thanks,
- Tom


Re: fuse-caller-save - hook format

2014-04-22 Thread Tom de Vries

On 17-04-14 18:49, Vladimir Makarov wrote:

I see.  I guess your proposed solution is ok then.


Vladimir,
Richard,

I've updated the fuse-caller-save patch series to model non-callee call clobbers 
in CALL_INSN_FUNCTION_USAGE.


There are 2 new hooks:

1. call_fusage_contains_non_callee_clobbers.
A hook to indicate whether a target has added the non-callee call clobbers to 
CALL_INSN_FUNCTION_USAGE, meaning it's safe to do the fuse-caller-save optimization.


2. post_expand_call_insn.
A utility hook to facilitate adding the clobbers to CALL_INSN_FUNCTION_USAGE.

I've bootstrapped and reg-tested on x86_64, and I've build and reg-tested on 
MIPS.

The series now looks like:

 1   -fuse-caller-save - Add command line option
 2   -fuse-caller-save - Add new reg-note REG_CALL_DECL
 3   Add implicit parameter to find_all_hard_reg_sets
 4   Register CALL_INSN_FUNCTION_USAGE in find_all_hard_reg_sets
 5   Add call_fusage_contains_non_callee_clobbers hook
 6   -fuse-caller-save - Collect register usage information
 7   -fuse-caller-save - Use collected register usage information
 8   -fuse-caller-save - Enable by default at O2 and higher
 9   -fuse-caller-save - Add documentation
10   -fuse-caller-save - Add test-case
11   Add post_expand_call_insn hook
12   Add clobber_reg
13   -fuse-caller-save - Enable for MIPS
14   -fuse-caller-save - Enable for ARM
15   -fuse-caller-save - Enable for AArch64
16   -fuse-caller-save - Support in lra
17   -fuse-caller-save - Enable for i386

The submission/approval status is:
1-3, 7-10, 16: approved
4: submitted, pinged Eric Botcazou 16-04-2014
5, 11: new hook, need to submit
6, 14-15: approved earlier, but need to resubmit due to updated hook
12: new utility patch, need to submit
13: need to resubmit due to updated hook
17: need to submit

I'll post the patches that need (re)submitting.

Thanks,
- Tom


Re: fuse-caller-save - hook format

2014-04-22 Thread Tom de Vries

On 22-04-14 17:27, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

2. post_expand_call_insn.
A utility hook to facilitate adding the clobbers to CALL_INSN_FUNCTION_USAGE.


Why is this needed though?  Like I say, I think targets should update
CALL_INSN_FUNCTION_USAGE when emitting calls as part of the call expander.
Splitting the functionality of the call expanders across the define_expand
and a new hook just makes things unnecessarily complicated IMO.



Richard,

It is not needed, but it is convenient.

There are targets where the define_expands for calls use the rtl template. 
Having to add clobbers to the CALL_INSN_FUNCTION_USAGE for such a target means 
you cannot use the rtl template any more and instead need to generate all needed 
RTL insns in C code.


This hook means that you can keep using the rtl template, which is less 
intrusive for those targets.


Thanks,
- Tom


Thanks,
Richard





Add call_fusage_contains_non_callee_clobbers hook

2014-04-22 Thread Tom de Vries

On 22-04-14 17:05, Tom de Vries wrote:

I've updated the fuse-caller-save patch series to model non-callee call clobbers
in CALL_INSN_FUNCTION_USAGE.



Vladimir,

This patch adds a hook to indicate whether a target has added the non-callee 
call clobbers to CALL_INSN_FUNCTION_USAGE, meaning it's safe to do the 
fuse-caller-save optimization.


OK for trunk?

Thanks,
- Tom

2013-04-29  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

* target.def (call_fusage_contains_non_callee_clobbers): New DEFHOOK.
* doc/tm.texi.in (@node Stack and Calling): Add Miscellaneous Register
Hooks to @menu.
(@node Miscellaneous Register Hooks): New node.
(@hook TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): New hook.
* doc/tm.texi: Regenerate.

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index b8ca17e..8af8efd 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3091,6 +3091,7 @@ This describes the stack layout and calling conventions.
 * Profiling::
 * Tail Calls::
 * Stack Smashing Protection::
+* Miscellaneous Register Hooks::
 @end menu
 
 @node Frame Layout
@@ -5016,6 +5017,21 @@ normally defined in @file{libgcc2.c}.
 Whether this target supports splitting the stack when the options described in @var{opts} have been passed.  This is called after options have been parsed, so the target may reject splitting the stack in some configurations.  The default version of this hook returns false.  If @var{report} is true, this function may issue a warning or error; if @var{report} is false, it must simply return a value
 @end deftypefn
 
+@node Miscellaneous Register Hooks
+@subsection Miscellaneous register hooks
+@cindex miscellaneous register hooks
+
+@deftypefn {Target Hook} bool TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS (void)
+Return true if all the calls in the current function contain clobbers in
+CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call
+rather than by the callee, and are not already set or clobbered in the call
+pattern.  Examples of such registers are registers used in PLTs and stubs,
+and temporary registers used in the call instruction but not present in the
+rtl pattern.  Another way to formulate it is the registers not present in the
+rtl pattern that are clobbered by the call assuming the callee does not
+clobber any register.  The default version of this hook returns false.
+@end deftypefn
+
 @node Varargs
 @section Implementing the Varargs Macros
 @cindex varargs implementation
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index d793d26..8991c3c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2720,6 +2720,7 @@ This describes the stack layout and calling conventions.
 * Profiling::
 * Tail Calls::
 * Stack Smashing Protection::
+* Miscellaneous Register Hooks::
 @end menu
 
 @node Frame Layout
@@ -3985,6 +3986,12 @@ the function prologue.  Normally, the profiling code comes after.
 
 @hook TARGET_SUPPORTS_SPLIT_STACK
 
+@node Miscellaneous Register Hooks
+@subsection Miscellaneous register hooks
+@cindex miscellaneous register hooks
+
+@hook TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+
 @node Varargs
 @section Implementing the Varargs Macros
 @cindex varargs implementation
diff --git a/gcc/target.def b/gcc/target.def
index 3a64cd1..ae0bc9c 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5130,6 +5130,22 @@ FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, and the PIC_OFFSET_TABLE_REGNUM.,
  void, (bitmap regs),
  hook_void_bitmap)
 
+/* Targets should define this target hook to mark that non-callee clobbers are
+   present in CALL_INSN_FUNCTION_USAGE for all the calls in the current
+   function.  */
+DEFHOOK
+(call_fusage_contains_non_callee_clobbers,
+ Return true if all the calls in the current function contain clobbers in\n\
+CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call\n\
+rather than by the callee, and are not already set or clobbered in the call\n\
+pattern.  Examples of such registers are registers used in PLTs and stubs,\n\
+and temporary registers used in the call instruction but not present in the\n\
+rtl pattern.  Another way to formulate it is the registers not present in the\n\
+rtl pattern that are clobbered by the call assuming the callee does not\n\
+clobber any register.  The default version of this hook returns false.,
+ bool, (void),
+ hook_bool_void_false)
+
 /* Fill in additional registers set up by prologue into a regset.  */
 DEFHOOK
 (set_up_by_prologue,


Re: fuse-caller-save - hook format

2014-04-22 Thread Tom de Vries

On 22-04-14 18:18, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:


On 22-04-14 17:27, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

2. post_expand_call_insn.
A utility hook to facilitate adding the clobbers to CALL_INSN_FUNCTION_USAGE.


Why is this needed though?  Like I say, I think targets should update
CALL_INSN_FUNCTION_USAGE when emitting calls as part of the call expander.
Splitting the functionality of the call expanders across the define_expand
and a new hook just makes things unnecessarily complicated IMO.



Richard,

It is not needed, but it is convenient.

There are targets where the define_expands for calls use the rtl template.
Having to add clobbers to the CALL_INSN_FUNCTION_USAGE for such a target means
you cannot use the rtl template any more and instead need to generate
all needed
RTL insns in C code.

This hook means that you can keep using the rtl template, which is less
intrusive for those targets.




[ switching order of questions ]

Which target do you have in mind?


Aarch64.

 But if the target is simple enough to use a single call pattern for call
 cases, wouldn't it be possible to add the clobber directly to the call
 pattern?

I think that can be done, but that feels intrusive as well. I thought the reason 
that we added these clobbers to CALL_INSN_FUNCTION_USAGE was exactly because we 
did not want to add them to the rtl patterns?


But, if the maintainer is fine with that, so am I.

Richard Earnshaw,

are you ok with adding the IP0_REGNUM/IP1_REGNUM clobbers to all the call 
patterns in the Aarch64 target?


The alternatives are:
- rewrite the call expansions not to use the rtl templates, and add the clobbers
  there to CALL_INSN_FUNCTION_USAGE
- get the post_expand_call_insn hook approved and use that to add the clobbers
  to CALL_INSN_FUNCTION_USAGE.

what is your preference?

Thanks,
- Tom




Re: [PING] [PATCH] register CALL_INSN_FUNCTION_USAGE in find_all_hard_reg_sets

2014-04-22 Thread Tom de Vries

On 22-04-14 21:27, Eric Botcazou wrote:

Ping of this ( http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00888.html )
patch.




Eric,

thanks for the review.


That patch isn't for GCC mainline though, but


I don't understand why you say that.


OK on principle if you test it
on mainline,


I have.


avoid the very ugly set-inside-use idiom and do:

   record_hard_reg_sets (XEXP (op, 0), NULL, pset);

instead of reimplementing it manually.



Updated as attached, I'll retest and commit.

Thanks,
- Tom

2014-01-15  Tom de Vries  t...@codesourcery.com

	* rtlanal.c (find_all_hard_reg_sets): Note INSN_CALL_FUNCTION_USAGE
	clobbers.

diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 284c475..f3471b1 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -1052,8 +1052,14 @@ find_all_hard_reg_sets (const_rtx insn, HARD_REG_SET *pset, bool implicit)
 
   CLEAR_HARD_REG_SET (*pset);
   note_stores (PATTERN (insn), record_hard_reg_sets, pset);
-  if (implicit  CALL_P (insn))
-IOR_HARD_REG_SET (*pset, call_used_reg_set);
+  if (CALL_P (insn))
+{
+  if (implicit)
+	IOR_HARD_REG_SET (*pset, call_used_reg_set);
+
+  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+	record_hard_reg_sets (XEXP (link, 0), NULL, pset);
+}
   for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
 if (REG_NOTE_KIND (link) == REG_INC)
   record_hard_reg_sets (XEXP (link, 0), NULL, pset);


-fuse-caller-save - Collect register usage information

2014-04-23 Thread Tom de Vries

On 22-04-14 17:05, Tom de Vries wrote:

I've updated the fuse-caller-save patch series to model non-callee call clobbers
in CALL_INSN_FUNCTION_USAGE.


Vladimir,

This is the updated version of the previously approved patch 
http://gcc.gnu.org/ml/gcc-patches/2013-03/msg01320.html , updated for the new 
hook call_fusage_contains_non_callee_clobbers.


The only difference is in the functions get_call_reg_set_usage and 
collect_fn_hard_reg_usage which use the hook.


OK for trunk?

Thanks,
- Tom

2013-04-29  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

* cgraph.h (struct cgraph_node): Add function_used_regs,
function_used_regs_initialized and function_used_regs_valid fields.
* final.c: Move include of hard-reg-set.h to before rtl.h to declare
find_all_hard_reg_sets.
(collect_fn_hard_reg_usage, get_call_fndecl, get_call_cgraph_node)
(get_call_reg_set_usage): New function.
(rest_of_handle_final): Use collect_fn_hard_reg_usage.

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 15310d8..eb0fe8e 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -408,6 +408,15 @@ public:
   /* Time profiler: first run of function.  */
   int tp_first_run;
 
+  /* Call unsaved hard registers really used by the corresponding
+ function (including ones used by functions called by the
+ function).  */
+  HARD_REG_SET function_used_regs;
+  /* Set if function_used_regs is initialized.  */
+  unsigned function_used_regs_initialized: 1;
+  /* Set if function_used_regs is valid.  */
+  unsigned function_used_regs_valid: 1;
+
   /* Set when decl is an abstract function pointed to by the
  ABSTRACT_DECL_ORIGIN of a reachable function.  */
   unsigned used_as_abstract_origin : 1;
diff --git a/gcc/final.c b/gcc/final.c
index 83abee2..0b1947d 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include tree.h
 #include varasm.h
+#include hard-reg-set.h
 #include rtl.h
 #include tm_p.h
 #include regs.h
@@ -57,7 +58,6 @@ along with GCC; see the file COPYING3.  If not see
 #include recog.h
 #include conditions.h
 #include flags.h
-#include hard-reg-set.h
 #include output.h
 #include except.h
 #include function.h
@@ -223,6 +223,7 @@ static int alter_cond (rtx);
 static int final_addr_vec_align (rtx);
 #endif
 static int align_fuzz (rtx, rtx, int, unsigned);
+static void collect_fn_hard_reg_usage (void);
 
 /* Initialize data in final at the beginning of a compilation.  */
 
@@ -4425,6 +4426,7 @@ rest_of_handle_final (void)
   assemble_start_function (current_function_decl, fnname);
   final_start_function (get_insns (), asm_out_file, optimize);
   final (get_insns (), asm_out_file, optimize);
+  collect_fn_hard_reg_usage ();
   final_end_function ();
 
   /* The IA-64 .handlerdata directive must be issued before the .endp
@@ -4720,3 +4722,119 @@ make_pass_clean_state (gcc::context *ctxt)
 {
   return new pass_clean_state (ctxt);
 }
+
+/* Collect hard register usage for the current function.  */
+
+static void
+collect_fn_hard_reg_usage (void)
+{
+  rtx insn;
+  int i;
+  struct cgraph_node *node;
+
+  if (!flag_use_caller_save)
+return;
+
+  node = cgraph_get_node (current_function_decl);
+  gcc_assert (node != NULL);
+
+  gcc_assert (!node-function_used_regs_initialized);
+  node-function_used_regs_initialized = 1;
+
+  for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn))
+{
+  HARD_REG_SET insn_used_regs;
+
+  if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+  find_all_hard_reg_sets (insn, insn_used_regs, false);
+
+  if (CALL_P (insn)
+	   (!targetm.call_fusage_contains_non_callee_clobbers ()
+	  || !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set)))
+	{
+	  CLEAR_HARD_REG_SET (node-function_used_regs);
+	  return;
+	}
+
+  IOR_HARD_REG_SET (node-function_used_regs, insn_used_regs);
+}
+
+  /* Be conservative - mark fixed and global registers as used.  */
+  IOR_HARD_REG_SET (node-function_used_regs, fixed_reg_set);
+  for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
+if (global_regs[i])
+  SET_HARD_REG_BIT (node-function_used_regs, i);
+
+#ifdef STACK_REGS
+  /* Handle STACK_REGS conservatively, since the df-framework does not
+ provide accurate information for them.  */
+
+  for (i = FIRST_STACK_REG; i = LAST_STACK_REG; i++)
+SET_HARD_REG_BIT (node-function_used_regs, i);
+#endif
+
+  node-function_used_regs_valid = 1;
+}
+
+/* Get the declaration of the function called by INSN.  */
+
+static tree
+get_call_fndecl (rtx insn)
+{
+  rtx note, datum;
+
+  if (!flag_use_caller_save)
+return NULL_TREE;
+
+  note = find_reg_note (insn, REG_CALL_DECL, NULL_RTX);
+  if (note == NULL_RTX)
+return NULL_TREE;
+
+  datum = XEXP (note, 0);
+  if (datum != NULL_RTX)
+return SYMBOL_REF_DECL (datum);
+
+  return NULL_TREE;
+}
+
+static struct cgraph_node *
+get_call_cgraph_node (rtx insn)
+{
+  tree fndecl

Add clobber_reg

2014-04-23 Thread Tom de Vries

On 22-04-14 17:05, Tom de Vries wrote:

I've updated the fuse-caller-save patch series to model non-callee call clobbers
in CALL_INSN_FUNCTION_USAGE.


Eric,

Richard Sandiford mentioned here ( 
http://gcc.gnu.org/ml/gcc-patches/2014-04/msg00870.html ):

...
Although we really should have a utility function like use_reg, but for
clobbers, so that the above would become:

  clobber_reg (CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (word_mode, 18));
...


I've implemented a patch that adds clobber_reg and clobber_reg_mode, similar to 
use_reg and use_reg_mode.


Bootstrapped and reg-tested on x86_64 as part of the fuse-caller-save series.

OK for trunk?

Thanks,
- Tom

2014-04-18  Tom de Vries  t...@codesourcery.com

* expr.c (clobber_reg_mode): New function.
* expr.h (clobber_reg): New function.

diff --git a/gcc/expr.c b/gcc/expr.c
index 72e4401..fc58eb7f 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -2396,6 +2396,18 @@ use_reg_mode (rtx *call_fusage, rtx reg, enum machine_mode mode)
 = gen_rtx_EXPR_LIST (mode, gen_rtx_USE (VOIDmode, reg), *call_fusage);
 }
 
+/* Add a CLOBBER expression for REG to the (possibly empty) list pointed
+   to by CALL_FUSAGE.  REG must denote a hard register.  */
+
+void
+clobber_reg_mode (rtx *call_fusage, rtx reg, enum machine_mode mode)
+{
+  gcc_assert (REG_P (reg)  REGNO (reg)  FIRST_PSEUDO_REGISTER);
+
+  *call_fusage
+= gen_rtx_EXPR_LIST (mode, gen_rtx_CLOBBER (VOIDmode, reg), *call_fusage);
+}
+
 /* Add USE expressions to *CALL_FUSAGE for each of NREGS consecutive regs,
starting at REGNO.  All of these registers must be hard registers.  */
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 524da67..1823feb 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -346,6 +346,7 @@ extern void copy_blkmode_from_reg (rtx, rtx, tree);
 /* Mark REG as holding a parameter for the next CALL_INSN.
Mode is TYPE_MODE of the non-promoted parameter, or VOIDmode.  */
 extern void use_reg_mode (rtx *, rtx, enum machine_mode);
+extern void clobber_reg_mode (rtx *, rtx, enum machine_mode);
 
 extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
 
@@ -356,6 +357,13 @@ use_reg (rtx *fusage, rtx reg)
   use_reg_mode (fusage, reg, VOIDmode);
 }
 
+/* Mark REG as clobbered by the call with FUSAGE as CALL_INSN_FUNCTION_USAGE.  */
+static inline void
+clobber_reg (rtx *fusage, rtx reg)
+{
+  clobber_reg_mode (fusage, reg, VOIDmode);
+}
+
 /* Mark NREGS consecutive regs, starting at REGNO, as holding parameters
for the next CALL_INSN.  */
 extern void use_regs (rtx *, int, int);


Add post_expand_call_insn hook

2014-04-23 Thread Tom de Vries

On 22-04-14 17:05, Tom de Vries wrote:

I've updated the fuse-caller-save patch series to model non-callee call clobbers
in CALL_INSN_FUNCTION_USAGE.


Eric,

this patch adds a post_expand_call_insn hook.

The hook is called right after expansion of calls, and allows a target to do 
additional processing, such as f.i. adding clobbers to CALL_INSN_FUNCTION_USAGE.


Instead of using the hook, we could add code to the preparation statements 
operand of the different call expands, but that requires those expands not to 
use the rtl template, and generate all the rtl through c code. Which requires a 
rewrite of the call expands in case of Aarch64.


Bootstrapped and reg-tested on x86_64 as part of the fuse-caller-save patch 
series.

OK for trunk?

Thanks,
- Tom

2014-04-18  Tom de Vries  t...@codesourcery.com

* target.def (post_expand_call_insn): New DEFHOOK.
* calls.c (expand_call, emit_library_call_value_1): Call
post_expand_call_insn hook.
* tm.texi.in (@section Storage Layout): Add hook
TARGET_POST_EXPAND_CALL_INSN.
* hooks.c (hook_void_rtx): New function.
* hooks.h (hook_void_rtx): Declare function.
diff --git a/gcc/calls.c b/gcc/calls.c
index e798c7a..0777a02 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3507,6 +3507,8 @@ expand_call (tree exp, rtx target, int ignore)
 
   free (stack_usage_map_buf);
 
+  targetm.post_expand_call_insn (last_call_insn ());
+
   return target;
 }
 
@@ -4344,6 +4346,8 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
 
   free (stack_usage_map_buf);
 
+  targetm.post_expand_call_insn (last_call_insn ());
+
   return value;
 
 }
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8af8efd..40b5bb1 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -1408,6 +1408,11 @@ registers whenever the function being expanded has any SDmode
 usage.
 @end deftypefn
 
+@deftypefn {Target Hook} void TARGET_POST_EXPAND_CALL_INSN (rtx)
+This hook is called just after expansion of a call_expr into rtl, allowing
+the target to perform additional processing.
+@end deftypefn
+
 @deftypefn {Target Hook} void TARGET_INSTANTIATE_DECLS (void)
 This hook allows the backend to perform additional instantiations on rtl
 that are not actually in any insns yet, but will be later.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 8991c3c..812b0b8 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -1285,6 +1285,8 @@ The default definition of this macro returns false for all sizes.
 
 @hook TARGET_EXPAND_TO_RTL_HOOK
 
+@hook TARGET_POST_EXPAND_CALL_INSN
+
 @hook TARGET_INSTANTIATE_DECLS
 
 @hook TARGET_MANGLE_TYPE
diff --git a/gcc/hooks.c b/gcc/hooks.c
index 1c67bdf..53e8591 100644
--- a/gcc/hooks.c
+++ b/gcc/hooks.c
@@ -461,6 +461,13 @@ hook_void_rtx_int (rtx insn ATTRIBUTE_UNUSED, int mode ATTRIBUTE_UNUSED)
 {
 }
 
+/* Generic hook that takes a rtx and an int and returns void.  */
+
+void
+hook_void_rtx (rtx insn ATTRIBUTE_UNUSED)
+{
+}
+
 /* Generic hook that takes a struct gcc_options * and returns void.  */
 
 void
diff --git a/gcc/hooks.h b/gcc/hooks.h
index 896b41d..4df5ae0 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -66,6 +66,7 @@ extern bool hook_bool_dint_dint_uint_bool_true (double_int, double_int,
 
 extern void hook_void_void (void);
 extern void hook_void_constcharptr (const char *);
+extern void hook_void_rtx (rtx);
 extern void hook_void_rtx_int (rtx, int);
 extern void hook_void_FILEptr_constcharptr (FILE *, const char *);
 extern bool hook_bool_FILEptr_rtx_false (FILE *, rtx);
diff --git a/gcc/target.def b/gcc/target.def
index ae0bc9c..2f7178c 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4639,6 +4639,15 @@ usage.,
  hook_void_void)
 
 /* This target hook allows the backend to perform additional
+   processing after expansion of a call insn.  */
+DEFHOOK
+(post_expand_call_insn,
+ This hook is called just after expansion of a call_expr into rtl, allowing\n\
+the target to perform additional processing.,
+ void, (rtx),
+ hook_void_rtx)
+
+/* This target hook allows the backend to perform additional
instantiations on rtx that are not actually in insns yet,
but will be later.  */
 DEFHOOK


Fix DEFHOOKPOD argument order in target-hooks-macros.h comment

2014-04-23 Thread Tom de Vries

Joern,

target-hooks-macros.h shows an argument order of DOC, TYPE, NAME, INIT for 
DEFHOOKPOD in a comment:

...
   DEFHOOKPOD(DOC, TYPE, NAME, INIT): Define a piece-of-data 'hook'.
...

But the first DEFHOOKPOD that I see in target.def:
...
DEFHOOKPOD
(atomic_test_and_set_trueval,
 This value should be set if the result written by\
 @code{atomic_test_and_set} is not exactly 1, i.e. the\
 @code{bool} @code{true}.,
 unsigned char, 1)
...
seems to have the order NAME, DOC, TYPE, INIT.

target.def is the only file to include target-hooks-macros.h, but other defines 
of DEFHOOKPOD in target.h:

...
#define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME;
...
and genhooks.c:
...
#define DEFHOOKPOD(NAME, DOC, TYPE, INIT) \
...
use the the same order.

I'd say that confirms that the argument order for DEFHOOKPOD in the comment in 
target-hooks-macros.h is wrong.


This patch fixes that. I'll commit shortly, as obvious.

Thanks,
- Tom

2014-04-23  Tom de Vries  t...@codesourcery.com

* target-hooks-macros.h: Fix DEFHOOKPOD argument order in comment.

diff --git a/gcc/target-hooks-macros.h b/gcc/target-hooks-macros.h
index 5cf4cb1..901f824 100644
--- a/gcc/target-hooks-macros.h
+++ b/gcc/target-hooks-macros.h
@@ -18,7 +18,7 @@
 /* The following macros should be provided by the including file:
 
DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT): Define a function-valued hook.
-   DEFHOOKPOD(DOC, TYPE, NAME, INIT): Define a piece-of-data 'hook'.  */
+   DEFHOOKPOD(NAME, DOC, TYPE, INIT): Define a piece-of-data 'hook'.  */
 
 /* Defaults for optional macros:
DEFHOOKPODX(NAME, TYPE, INIT): Like DEFHOOKPOD, but share documentation


Re: Add call_fusage_contains_non_callee_clobbers hook

2014-04-24 Thread Tom de Vries

On 23-04-14 17:10, Richard Sandiford wrote:

FWIW I think this should be a plain bool rather than a function,
like delay_sched2 etc.



Vladimir,

I've reimplemented the hook using DEFHOOKPOD instead of DEFHOOK, to make it a 
plain bool.


OK for trunk?

Thanks,
- Tom

2013-04-29  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

* target.def (call_fusage_contains_non_callee_clobbers): New DEFHOOKPOD.
* doc/tm.texi.in (@node Stack and Calling): Add Miscellaneous Register
Hooks to @menu.
(@node Miscellaneous Register Hooks): New node.
(@hook TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): New hook.
* doc/tm.texi: Regenerate.


diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index b8ca17e..f06113d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -3091,6 +3091,7 @@ This describes the stack layout and calling conventions.
 * Profiling::
 * Tail Calls::
 * Stack Smashing Protection::
+* Miscellaneous Register Hooks::
 @end menu
 
 @node Frame Layout
@@ -5016,6 +5017,21 @@ normally defined in @file{libgcc2.c}.
 Whether this target supports splitting the stack when the options described in @var{opts} have been passed.  This is called after options have been parsed, so the target may reject splitting the stack in some configurations.  The default version of this hook returns false.  If @var{report} is true, this function may issue a warning or error; if @var{report} is false, it must simply return a value
 @end deftypefn
 
+@node Miscellaneous Register Hooks
+@subsection Miscellaneous register hooks
+@cindex miscellaneous register hooks
+
+@deftypevr {Target Hook} bool TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+set to true if all the calls in the current function contain clobbers in
+CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call
+rather than by the callee, and are not already set or clobbered in the call
+pattern.  Examples of such registers are registers used in PLTs and stubs,
+and temporary registers used in the call instruction but not present in the
+rtl pattern.  Another way to formulate it is the registers not present in the
+rtl pattern that are clobbered by the call assuming the callee does not
+clobber any register.  The default version of this hook is set to false.
+@end deftypevr
+
 @node Varargs
 @section Implementing the Varargs Macros
 @cindex varargs implementation
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index d793d26..8991c3c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2720,6 +2720,7 @@ This describes the stack layout and calling conventions.
 * Profiling::
 * Tail Calls::
 * Stack Smashing Protection::
+* Miscellaneous Register Hooks::
 @end menu
 
 @node Frame Layout
@@ -3985,6 +3986,12 @@ the function prologue.  Normally, the profiling code comes after.
 
 @hook TARGET_SUPPORTS_SPLIT_STACK
 
+@node Miscellaneous Register Hooks
+@subsection Miscellaneous register hooks
+@cindex miscellaneous register hooks
+
+@hook TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+
 @node Varargs
 @section Implementing the Varargs Macros
 @cindex varargs implementation
diff --git a/gcc/target.def b/gcc/target.def
index 3a64cd1..5787e13 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5130,6 +5130,22 @@ FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, and the PIC_OFFSET_TABLE_REGNUM.,
  void, (bitmap regs),
  hook_void_bitmap)
 
+/* Targets should define this target hook to mark that non-callee clobbers are
+   present in CALL_INSN_FUNCTION_USAGE for all the calls in the current
+   function.  */
+DEFHOOKPOD
+(call_fusage_contains_non_callee_clobbers,
+ set to true if all the calls in the current function contain clobbers in\n\
+CALL_INSN_FUNCTION_USAGE for the registers that are clobbered by the call\n\
+rather than by the callee, and are not already set or clobbered in the call\n\
+pattern.  Examples of such registers are registers used in PLTs and stubs,\n\
+and temporary registers used in the call instruction but not present in the\n\
+rtl pattern.  Another way to formulate it is the registers not present in the\n\
+rtl pattern that are clobbered by the call assuming the callee does not\n\
+clobber any register.  The default version of this hook is set to false.,
+ bool, 
+ false)
+
 /* Fill in additional registers set up by prologue into a regset.  */
 DEFHOOK
 (set_up_by_prologue,


-fuse-caller-save - Enable for MIPS

2014-04-25 Thread Tom de Vries

On 22-04-14 17:05, Tom de Vries wrote:

I've updated the fuse-caller-save patch series to model non-callee call clobbers
in CALL_INSN_FUNCTION_USAGE.



Richard,

this patch enables the fuse-caller-save optimization for MIPS.

It adds the $6 clobber in CALL_INSN_FUNCTION_USAGE when required, and sets 
TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.


I've done a minimal rebuild for mips, ran the fuse-caller-save testcase and 
checked with -dP that I can find $6 in the C_I_F_U.


ok for trunk if a full MIPS build and test cycle succeeds?

Thanks,
- Tom

2014-01-12  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

* config/mips/mips.h (POST_CALL_TMP_REG): Define.
* config/mips/mips.c (mips_split_call): Use POST_CALL_TMP_REG.
(TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): Redefine to true.
(mips_expand_call): Add POST_CALL_TMP_REG clobber.
* config/mips/mips.md (define_expand untyped_call): Add
POST_CALL_TMP_REG clobber.

* gcc.target/mips/mips.exp: Add use-caller-save to -ffoo/-fno-foo
options.
* gcc.target/mips/fuse-caller-save.c: New test.
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 45256e9..b61cd44 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -7027,11 +7027,17 @@ mips_expand_call (enum mips_call_type type, rtx result, rtx addr,
 {
   rtx orig_addr, pattern, insn;
   int fp_code;
+  rtx post_call_tmp_reg = gen_rtx_REG (word_mode, POST_CALL_TMP_REG);
 
   fp_code = aux == 0 ? 0 : (int) GET_MODE (aux);
   insn = mips16_build_call_stub (result, addr, args_size, fp_code);
   if (insn)
 {
+  if (TARGET_EXPLICIT_RELOCS
+	   TARGET_CALL_CLOBBERED_GP
+	   !find_reg_note (insn, REG_NORETURN, 0))
+	clobber_reg (CALL_INSN_FUNCTION_USAGE (insn), post_call_tmp_reg);
+
   gcc_assert (!lazy_p  type == MIPS_CALL_NORMAL);
   return insn;
 }
@@ -7087,7 +7093,13 @@ mips_expand_call (enum mips_call_type type, rtx result, rtx addr,
   pattern = fn (result, addr, args_size);
 }
 
-  return mips_emit_call_insn (pattern, orig_addr, addr, lazy_p);
+  insn = mips_emit_call_insn (pattern, orig_addr, addr, lazy_p);
+  if (TARGET_EXPLICIT_RELOCS
+   TARGET_CALL_CLOBBERED_GP
+   !find_reg_note (insn, REG_NORETURN, 0))
+clobber_reg (CALL_INSN_FUNCTION_USAGE (insn), post_call_tmp_reg);
+
+  return insn;
 }
 
 /* Split call instruction INSN into a $gp-clobbering call and
@@ -7099,10 +7111,8 @@ mips_split_call (rtx insn, rtx call_pattern)
 {
   emit_call_insn (call_pattern);
   if (!find_reg_note (insn, REG_NORETURN, 0))
-/* Pick a temporary register that is suitable for both MIPS16 and
-   non-MIPS16 code.  $4 and $5 are used for returning complex double
-   values in soft-float code, so $6 is the first suitable candidate.  */
-mips_restore_gp_from_cprestore_slot (gen_rtx_REG (Pmode, GP_ARG_FIRST + 2));
+mips_restore_gp_from_cprestore_slot (gen_rtx_REG (Pmode,
+		  POST_CALL_TMP_REG));
 }
 
 /* Return true if a call to DECL may need to use JALX.  */
@@ -19134,6 +19144,9 @@ mips_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV mips_atomic_assign_expand_fenv
 
+#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include gt-mips.h
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index b25865b..8c5498a 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2212,6 +2212,11 @@ enum reg_class
 #define FP_ARG_FIRST (FP_REG_FIRST + 12)
 #define FP_ARG_LAST  (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
 
+/* Temporary register that is used after a call, and suitable for both
+   MIPS16 and non-MIPS16 code.  $4 and $5 are used for returning complex double
+   values in soft-float code, so $6 is the first suitable candidate.  */
+#define POST_CALL_TMP_REG (GP_ARG_FIRST + 2)
+
 /* 1 if N is a possible register number for function argument passing.
We have no FP argument registers when soft-float.  When FP registers
are 32 bits, we can't directly reference the odd numbered ones.  */
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index f914ab6..e333c42 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -6787,8 +6787,15 @@
   
 {
   int i;
+  rtx insn;
+  rtx post_call_tmp_reg = gen_rtx_REG (word_mode, POST_CALL_TMP_REG);
 
-  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+  insn = emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  if (TARGET_EXPLICIT_RELOCS
+   TARGET_CALL_CLOBBERED_GP
+   !find_reg_note (insn, REG_NORETURN, 0))
+clobber_reg (CALL_INSN_FUNCTION_USAGE (insn), post_call_tmp_reg);
 
   for (i = 0; i  XVECLEN (operands[2], 0); i++)
 {
diff --git a/gcc

Re: [COMMITTED] Fix debug/60438 -- i686 stack vs fp operations

2014-04-26 Thread Tom de Vries

On 13-03-14 21:49, Richard Henderson wrote:

  (define_expand ldexpxf3
-  [(set (match_dup 3)
-   (float:XF (match_operand:SI 2 register_operand)))
-   (parallel [(set (match_operand:XF 0  register_operand)
-  (unspec:XF [(match_operand:XF 1 register_operand)
-  (match_dup 3)]
- UNSPEC_FSCALE_FRACT))
- (set (match_dup 4)
-  (unspec:XF [(match_dup 1) (match_dup 3)]
- UNSPEC_FSCALE_EXP))])]
+  [(match_operand:XF 0 register_operand)
+   (match_operand:XF 1 register_operand)
+   (match_operand:SI 2 register_operand)]
TARGET_USE_FANCY_MATH_387
  flag_unsafe_math_optimizations
  {
@@ -14808,6 +14633,11 @@

operands[3] = gen_reg_rtx (XFmode);
operands[4] = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_floatsixf2 (operands[3], operands[2]));
+  emit_insn (gen_fscalexf4_i387 (operands[0], operands[4],
+ operands[1], operands[3]));
+  DONE;
  })


Richard,

For a non-bootstrap x86_64 build, gcc.dg/builtins-34.c fails for me with a 
sigsegv.

I've traced it back to this code in insn-emit.c:
...
rtx
gen_ldexpxf3 (rtx operand0,
rtx operand1,
rtx operand2)
{
  rtx _val = 0;
  start_sequence ();
  {
rtx operands[3];
operands[0] = operand0;
operands[1] = operand1;
operands[2] = operand2;

{
  if (optimize_insn_for_size_p ())
FAIL;

  operands[3] = gen_reg_rtx (XFmode);
  operands[4] = gen_reg_rtx (XFmode);
...

operands is declared with size 3, and operands[3,4] accesses are out of bounds.

I've done a minimal build with attached patch, and reran the test-case, which 
passes now.


OK if bootstrap succeeds?

Thanks,
- Tom
2014-04-26  Tom de Vries  t...@codesourcery.com

	* config/i386/i386.md (define_expand ldexpxf3): Fix out-of-bounds
	array accesses.
---
 gcc/config/i386/i386.md | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 25e2e93..9f103cf 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14427,15 +14427,16 @@
   TARGET_USE_FANCY_MATH_387
 flag_unsafe_math_optimizations
 {
+  rtx tmp1, tmp2;
   if (optimize_insn_for_size_p ())
 FAIL;
 
-  operands[3] = gen_reg_rtx (XFmode);
-  operands[4] = gen_reg_rtx (XFmode);
+  tmp1 = gen_reg_rtx (XFmode);
+  tmp2 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_floatsixf2 (operands[3], operands[2]));
-  emit_insn (gen_fscalexf4_i387 (operands[0], operands[4],
- operands[1], operands[3]));
+  emit_insn (gen_floatsixf2 (tmp1, operands[2]));
+  emit_insn (gen_fscalexf4_i387 (operands[0], tmp2,
+ operands[1], tmp1));
   DONE;
 })
 
-- 
1.8.3.2



Re: -fuse-caller-save - Enable for MIPS

2014-04-26 Thread Tom de Vries

On 25-04-14 15:22, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 45256e9..b61cd44 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -7027,11 +7027,17 @@ mips_expand_call (enum mips_call_type type, rtx result, 
rtx addr,
  {
rtx orig_addr, pattern, insn;
int fp_code;
+  rtx post_call_tmp_reg = gen_rtx_REG (word_mode, POST_CALL_TMP_REG);

fp_code = aux == 0 ? 0 : (int) GET_MODE (aux);
insn = mips16_build_call_stub (result, addr, args_size, fp_code);
if (insn)
  {
+  if (TARGET_EXPLICIT_RELOCS
+  TARGET_CALL_CLOBBERED_GP
+  !find_reg_note (insn, REG_NORETURN, 0))
+   clobber_reg (CALL_INSN_FUNCTION_USAGE (insn), post_call_tmp_reg);
+


I think this condition should go in mips_emit_call_insn instead,
so that we don't have 4 instances of it.  untyped_call could then
use mips_expand_call as well.



Richard,

Done.


Until now there was no real downside to using $6 for non-MIPS16 code.
Now that there is, it would probably be worth making it:

+#define POST_CALL_TMP_REG \
   (TARGET_MIPS16 ? GP_ARG_FIRST + 2 : PIC_OFFSET_TABLE_REGNUM)

and only adding the clobber for MIPS16.



Done.


diff --git a/gcc/testsuite/gcc.target/mips/fuse-caller-save.c 
b/gcc/testsuite/gcc.target/mips/fuse-caller-save.c
new file mode 100644
index 000..1fd6c7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/fuse-caller-save.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options -fuse-caller-save } */
+/* { dg-skip-if  { *-*-* }  { * } { -Os } } */


I might have asked this before, sorry, but why this skip?  Please add a brief
comment (in the string, if short enough).


I've reduced the amount of skips a bit, and added a comment why they are 
skipped.




+/* Testing -fuse-caller-save optimization option.  */
+
+static int __attribute__((noinline)) NOCOMPRESSION
+bar (int x)
+{
+  return x + 3;
+}
+
+int __attribute__((noinline)) NOCOMPRESSION
+foo (int y)
+{
+  return y + bar (y);
+}
+
+int NOCOMPRESSION
+main (void)
+{
+  return !(foo (5) == 13);
+}
+
+/* Check that there are only 2 stack-saves: r31 in main and foo.  */
+
+/* Check that there only 2 sw/sd.  */
+/* { dg-final { scan-assembler-times (?n)s\[wd\]\t\\\$.*,.*\\(\\\$sp\\) 2 } 
} */
+
+/* Check that the first caller-save register is unused.  */
+/* { dg-final { scan-assembler-not \\\$16 } } */


It'd be good to avoid NOCOMPRESSION because the only case that really
needs the temporary register is MIPS16.  Please try putting the test
in a header file and reusing it for three tests, one each of MIPS16,
microMIPS and uncompressed.



Done, I think, I'm not 100% sure I understood what you wanted me to do here.

build and reg-tested on MIPS.

OK for trunk?

Thanks,
- Tom

2014-01-12  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

	* config/mips/mips-protos.h (mips_emit_call_insn): Declare.
	* config/mips/mips.h (POST_CALL_TMP_REG): Define.
	* config/mips/mips.c (mips_emit_call_insn): Remove static.  Use
	last_call_insn.  Add POST_CALL_TMP_REG clobber
	 (mips_split_call): Use POST_CALL_TMP_REG.
	(TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): Redefine to true.
	* config/mips/mips.md (define_expand untyped_call): Use
	mips_emit_call_insn.

	* gcc.target/mips/mips.exp: Add use-caller-save to -ffoo/-fno-foo
	options.
	* gcc.target/mips/fuse-caller-save.h: New include file.
	* gcc.target/mips/fuse-caller-save.c: New test.
	* gcc.target/mips/fuse-caller-save-mips16.c: Same.
	* gcc.target/mips/fuse-caller-save-micromips.c: Same.
---
 gcc/config/mips/mips-protos.h  |  1 +
 gcc/config/mips/mips.c | 24 --
 gcc/config/mips/mips.h |  7 +++
 gcc/config/mips/mips.md|  4 +++-
 .../gcc.target/mips/fuse-caller-save-micromips.c   | 17 +++
 .../gcc.target/mips/fuse-caller-save-mip16.c   | 17 +++
 gcc/testsuite/gcc.target/mips/fuse-caller-save.c   | 17 +++
 gcc/testsuite/gcc.target/mips/fuse-caller-save.h   | 17 +++
 gcc/testsuite/gcc.target/mips/mips.exp |  1 +
 9 files changed, 98 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save-mip16.c
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save.c
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save.h

diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 3d59b7b..19ea919 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -232,6 +232,7 @@ extern bool mips_use_pic_fn_addr_reg_p (const_rtx);
 extern rtx mips_expand_call (enum mips_call_type, rtx, rtx, rtx, rtx, bool);
 extern void mips_split_call (rtx, rtx);
 extern bool mips_get_pic_call_symbol (rtx

Re: -fuse-caller-save - Collect register usage information

2014-04-26 Thread Tom de Vries

Eric,
Honza,

This patch adds analysis in pass_final to track which hard registers are set or
clobbered by the function body, and stores that information in a
struct cgraph_node, to be used in the fuse-caller-save optmization.

This is the updated version of the previously approved patch
submitted here (http://gcc.gnu.org/ml/gcc-patches/2013-03/msg01320.html ).
The changes are:
- using a new hook call_fusage_contains_non_callee_clobbers,
- incorporating minor review comments from Richard Sandiford
  ( http://gcc.gnu.org/ml/gcc-patches/2014-04/msg01436.html ).

As part of the fuse-caller-save patch series, bootstrapped and reg-tested on 
x86_64, and build and reg-tested on MIPS.


Eric, non-cgraph part OK for trunk?

Honza, cgraph part OK for trunk?

Thanks,
- Tom
2013-04-29  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

	* cgraph.h (struct cgraph_node): Add function_used_regs,
	function_used_regs_initialized and function_used_regs_valid fields.
	* final.c: Move include of hard-reg-set.h to before rtl.h to declare
	find_all_hard_reg_sets.
	(collect_fn_hard_reg_usage, get_call_fndecl, get_call_cgraph_node)
	(get_call_reg_set_usage): New function.
	(rest_of_handle_final): Use collect_fn_hard_reg_usage.
---
 gcc/cgraph.h |   7 
 gcc/final.c  | 117 ++-
 gcc/regs.h   |   4 ++
 3 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 84fc1d9..d1f 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -408,6 +408,13 @@ public:
   /* Time profiler: first run of function.  */
   int tp_first_run;
 
+  /* Call unsaved hard registers really used by the corresponding
+ function (including ones used by functions called by the
+ function).  */
+  HARD_REG_SET function_used_regs;
+  /* Set if function_used_regs is valid.  */
+  unsigned function_used_regs_valid: 1;
+
   /* Set when decl is an abstract function pointed to by the
  ABSTRACT_DECL_ORIGIN of a reachable function.  */
   unsigned used_as_abstract_origin : 1;
diff --git a/gcc/final.c b/gcc/final.c
index 8c6f6ee..7b79059 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include tree.h
 #include varasm.h
+#include hard-reg-set.h
 #include rtl.h
 #include tm_p.h
 #include regs.h
@@ -57,7 +58,6 @@ along with GCC; see the file COPYING3.  If not see
 #include recog.h
 #include conditions.h
 #include flags.h
-#include hard-reg-set.h
 #include output.h
 #include except.h
 #include function.h
@@ -223,6 +223,7 @@ static int alter_cond (rtx);
 static int final_addr_vec_align (rtx);
 #endif
 static int align_fuzz (rtx, rtx, int, unsigned);
+static void collect_fn_hard_reg_usage (void);
 
 /* Initialize data in final at the beginning of a compilation.  */
 
@@ -4426,6 +4427,7 @@ rest_of_handle_final (void)
   assemble_start_function (current_function_decl, fnname);
   final_start_function (get_insns (), asm_out_file, optimize);
   final (get_insns (), asm_out_file, optimize);
+  collect_fn_hard_reg_usage ();
   final_end_function ();
 
   /* The IA-64 .handlerdata directive must be issued before the .endp
@@ -4724,3 +4726,116 @@ make_pass_clean_state (gcc::context *ctxt)
 {
   return new pass_clean_state (ctxt);
 }
+
+/* Collect hard register usage for the current function.  */
+
+static void
+collect_fn_hard_reg_usage (void)
+{
+  rtx insn;
+  int i;
+  struct cgraph_node *node;
+
+  if (!flag_use_caller_save
+  || !targetm.call_fusage_contains_non_callee_clobbers)
+return;
+
+  node = cgraph_get_node (current_function_decl);
+  gcc_assert (node != NULL);
+
+  for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn))
+{
+  HARD_REG_SET insn_used_regs;
+
+  if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+  find_all_hard_reg_sets (insn, insn_used_regs, false);
+
+  if (CALL_P (insn)
+	   !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set))
+	{
+	  CLEAR_HARD_REG_SET (node-function_used_regs);
+	  return;
+	}
+
+  IOR_HARD_REG_SET (node-function_used_regs, insn_used_regs);
+}
+
+  /* Be conservative - mark fixed and global registers as used.  */
+  IOR_HARD_REG_SET (node-function_used_regs, fixed_reg_set);
+  for (i = 0; i  FIRST_PSEUDO_REGISTER; i++)
+if (global_regs[i])
+  SET_HARD_REG_BIT (node-function_used_regs, i);
+
+#ifdef STACK_REGS
+  /* Handle STACK_REGS conservatively, since the df-framework does not
+ provide accurate information for them.  */
+
+  for (i = FIRST_STACK_REG; i = LAST_STACK_REG; i++)
+SET_HARD_REG_BIT (node-function_used_regs, i);
+#endif
+
+  node-function_used_regs_valid = 1;
+}
+
+/* Get the declaration of the function called by INSN.  */
+
+static tree
+get_call_fndecl (rtx insn)
+{
+  rtx note, datum;
+
+  if (!flag_use_caller_save)
+return NULL_TREE;
+
+  note = find_reg_note (insn, REG_CALL_DECL, NULL_RTX);
+  if (note == NULL_RTX)
+return

Re: [COMMITTED] Fix debug/60438 -- i686 stack vs fp operations

2014-04-26 Thread Tom de Vries

OK if bootstrap succeeds?


With testing of the bootstrap build of the patch, I ran into the following 
regression compared to a reference bootstrap build without the patch:

...
FAIL: g++.dg/tsan/cond_race.C  -O2  output pattern test, is ==3087==WARNING: 
Program is run with unlimited stack size, which wouldn't work with Threa\

dSanitizer.
==3087==Re-execing with stack size limited to 33554432 bytes.
==
WARNING: ThreadSanitizer: heap-use-after-free (pid=3087)
  Read of size 8 at 0x7d18efc8 by thread T1:
#0 pthread_cond_signal 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/libsanitizer/sanitizer_common/sanitizer_common_interceptors.i\

nc:2266 (libtsan.so.0+0x00039b21)
#1 thr(void*) 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/gcc/testsuite/g++.dg/tsan/cond_race.C:20 
(cond_race.exe+0x1033\

)
  Previous write of size 8 at 0x7d18efc8 by main thread:
#0 operator delete(void*) 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/libsanitizer/tsan/tsan_interceptors.cc:592 
(libtsan.so.0+0\

x000494b0)
#1 main 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/gcc/testsuite/g++.dg/tsan/cond_race.C:34 
(cond_race.exe+0x0ea0)

  Location is heap block of size 96 at 0x7d18efa0 allocated by main thread:
#0 operator new(unsigned long) 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/libsanitizer/tsan/tsan_interceptors.cc:560 
(libtsan.s\

o.0+0x000496f2)
#1 main 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/gcc/testsuite/g++.dg/tsan/cond_race.C:25 
(cond_race.exe+0x0e12)

  Thread T1 (tid=3101, running) created by main thread at:
#0 pthread_create 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/libsanitizer/tsan/tsan_interceptors.cc:877 
(libtsan.so.0+0x000\

47c23)
#1 main 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/gcc/testsuite/g++.dg/tsan/cond_race.C:29 
(cond_race.exe+0x0e5a)
SUMMARY: ThreadSanitizer: heap-use-after-free 
/home/vries/gcc_versions/data/test-fix-expand-ldexp/with/src/gcc/testsuite/g++.dg/tsan/cond_race.C:20 
t\

hr(void*)
==
ThreadSanitizer: reported 1 warnings
, should match ThreadSanitizer: data race.*pthread_cond_signal.*
...

I've found the same failure here: 
http://gcc.gnu.org/ml/gcc-testresults/2014-01/msg00127.html, so I'm assuming 
it's a spurious failure.


I've committed to trunk and 4.9.

Thanks,
- Tom


Re: -fuse-caller-save - Enable for MIPS

2014-04-27 Thread Tom de Vries

On 27-04-14 12:27, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

2014-01-12  Radovan Obradovic  robrado...@mips.com
 Tom de Vries  t...@codesourcery.com

* config/mips/mips-protos.h (mips_emit_call_insn): Declare.
* config/mips/mips.h (POST_CALL_TMP_REG): Define.
* config/mips/mips.c (mips_emit_call_insn): Remove static.  Use
last_call_insn.  Add POST_CALL_TMP_REG clobber
 (mips_split_call): Use POST_CALL_TMP_REG.
(TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): Redefine to true.
* config/mips/mips.md (define_expand untyped_call): Use
mips_emit_call_insn.

* gcc.target/mips/mips.exp: Add use-caller-save to -ffoo/-fno-foo
options.
* gcc.target/mips/fuse-caller-save.h: New include file.
* gcc.target/mips/fuse-caller-save.c: New test.
* gcc.target/mips/fuse-caller-save-mips16.c: Same.
* gcc.target/mips/fuse-caller-save-micromips.c: Same.


Sorry, a couple of things, but this is looking pretty good:


  mips_emit_call_insn (rtx pattern, rtx orig_addr, rtx addr, bool lazy_p)
  {
rtx insn, reg;

-  insn = emit_call_insn (pattern);
+  emit_call_insn (pattern);
+  insn = last_call_insn ();

if (TARGET_MIPS16  mips_use_pic_fn_addr_reg_p (orig_addr))
  {


This change isn't necessary; emit_call_insn is defined to return a CALL_INSN.



I dropped this change, as well as the change in the untyped_call expand, I 
realized it's unnecessary.



@@ -2843,6 +2844,16 @@ mips_emit_call_insn (rtx pattern, rtx orig_addr, rtx 
addr, bool lazy_p)
   gen_rtx_REG (Pmode, GOT_VERSION_REGNUM));
emit_insn (gen_update_got_version ());
  }
+
+  if (TARGET_MIPS16
+   TARGET_EXPLICIT_RELOCS
+   TARGET_CALL_CLOBBERED_GP
+   !find_reg_note (insn, REG_NORETURN, 0))
+{
+  rtx post_call_tmp_reg = gen_rtx_REG (word_mode, POST_CALL_TMP_REG);
+  clobber_reg (CALL_INSN_FUNCTION_USAGE (insn), post_call_tmp_reg);
+}


The REG_NORETURN note won't be around yet, so we might as well drop
that line.  I'm not sure how useful it would be anyway since values
are never live across a noreturn call.



Done.


+/* Temporary register that is used after a call.  $4 and $5 are used for


Might as well make it ...used when restoring $gp after a call, now that
it's not as obvious from context.


+   returning complex double values in soft-float code, so $6 is the first
+   suitable candidate for !TARGET_MIPS16.  For TARGET_MIPS16, we use
+   PIC_OFFSET_TABLE_REGNUM instead.  */


!TARGET_MIPS16 and TARGET_MIPS16 are the wrong way around:

suitable candidate for TARGET_MIPS16.  For !TARGET_MIPS16 we can use
$gp itself as the temporary.  */



Fixed, thanks for catching that.


+/* The scan of the sp-relative saves will fail for -O0 and -O1.
+   For -flto, scans will fail because there's no code in the .s file.  */
+/* { dg-skip-if  { *-*-* }  { -O0 -O1 -flto} } */


The -flto thing is handled automatically by the testsuite
(see force_conventional_output_for) so that one should be left out.



Ah, I see. Removed.


I'm a bit surprised that it doesn't work at -O1 for a simple test
like this though.  What goes wrong?



AFAIU now the problem is that the optimization doesn't trigger for -O0 and -01, 
because the register allocator behaves more conservatively.


OK for trunk, if re-testing succeeds?

Thanks,
- Tom

2014-01-12  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

	* config/mips/mips-protos.h (mips_emit_call_insn): Declare.
	* config/mips/mips.h (POST_CALL_TMP_REG): Define.
	* config/mips/mips.c (mips_emit_call_insn): Remove static.  Add
	POST_CALL_TMP_REG clobber.
	(mips_split_call): Use POST_CALL_TMP_REG.
	(TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): Redefine to true.

	* gcc.target/mips/mips.exp: Add use-caller-save to -ffoo/-fno-foo
	options.
	* gcc.target/mips/fuse-caller-save.h: New include file.
	* gcc.target/mips/fuse-caller-save.c: New test.
	* gcc.target/mips/fuse-caller-save-mips16.c: Same.
	* gcc.target/mips/fuse-caller-save-micromips.c: Same.
---
 gcc/config/mips/mips-protos.h|  1 +
 gcc/config/mips/mips.c   | 20 +++-
 gcc/config/mips/mips.h   |  7 +++
 .../gcc.target/mips/fuse-caller-save-micromips.c | 17 +
 .../gcc.target/mips/fuse-caller-save-mip16.c | 17 +
 gcc/testsuite/gcc.target/mips/fuse-caller-save.c | 17 +
 gcc/testsuite/gcc.target/mips/fuse-caller-save.h | 17 +
 gcc/testsuite/gcc.target/mips/mips.exp   |  1 +
 8 files changed, 92 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save-micromips.c
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save-mip16.c
 create mode 100644 gcc/testsuite/gcc.target/mips/fuse-caller-save.c

Re: -fuse-caller-save - Enable for MIPS

2014-04-28 Thread Tom de Vries

On 28-04-14 12:26, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

On 27-04-14 12:27, Richard Sandiford wrote:

Tom de Vries tom_devr...@mentor.com writes:

   mips_emit_call_insn (rtx pattern, rtx orig_addr, rtx addr, bool lazy_p)
   {
 rtx insn, reg;

-  insn = emit_call_insn (pattern);
+  emit_call_insn (pattern);
+  insn = last_call_insn ();

 if (TARGET_MIPS16  mips_use_pic_fn_addr_reg_p (orig_addr))
   {


This change isn't necessary; emit_call_insn is defined to return a CALL_INSN.



I dropped this change, as well as the change in the untyped_call expand, I
realized it's unnecessary.


Why was the untyped_call part unnecessary?



The define_expand untyped_call uses GEN_CALL, which uses define_expand call, 
which uses mips_expand_call, which uses mips_emit_call_insn, which adds the 
required clobbers.



I'm a bit surprised that it doesn't work at -O1 for a simple test
like this though.  What goes wrong?



AFAIU now the problem is that the optimization doesn't trigger for -O0
and -01, because the register allocator behaves more conservatively.


Hmm, is that just because -fcaller-saves is -O2 and above?
 If so,
should -fuse-caller-save imply -fcaller-saves?

Thanks,
Richard





Re: -fuse-caller-save - Enable for MIPS

2014-04-28 Thread Tom de Vries

On 28-04-14 12:47, Tom de Vries wrote:

Hmm, is that just because -fcaller-saves is -O2 and above?


For -O1, after adding -fcaller-saves the optimization triggers, and the 
test-cases passes.


For -O0, adding -fcaller-saves doesn't make a difference, the optimization 
doesn't trigger.



If so,
should -fuse-caller-save imply -fcaller-saves?


I don't think it's strictly necessary, but I can make a patch if required.

Thanks,
- Tom


Re: Add post_expand_call_insn hook

2014-04-29 Thread Tom de Vries

On 24-04-14 17:13, Eric Botcazou wrote:

The hook is called right after expansion of calls, and allows a target to do
additional processing, such as f.i. adding clobbers to
CALL_INSN_FUNCTION_USAGE.

Instead of using the hook, we could add code to the preparation statements
operand of the different call expands, but that requires those expands not
to use the rtl template, and generate all the rtl through c code. Which
requires a rewrite of the call expands in case of Aarch64.


If Aarch64 is the only problematic back-end, then it should be changed to do
what the other back-ends already do to use use_reg.  IMO adding such a bogus
hook should be the very last resort solution.



Eric,

I've written this concept patch, which tries to address the same problem, but in 
a different (and I hope more generic) way.


It adds a post-emission C-code operand to define_expand.

As an example of how this could be useful, for the define_expand of call and 
call_value in the arm target, I'm using the new operand to do the post-emit call 
processing done currently in arm_emit_call_insn. This allows us to eliminate the 
call_internal and call_value_internal define_expands, and simplifies the call 
and call_value define_expands.


Any comments?

Thanks,
- Tom
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 74645ee..506791a 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -126,7 +126,7 @@ extern int arm_const_double_inline_cost (rtx);
 extern bool arm_const_double_by_parts (rtx);
 extern bool arm_const_double_by_immediates (rtx);
 extern const char *fp_immediate_constant (rtx);
-extern void arm_emit_call_insn (rtx, rtx);
+extern void arm_post_emit_call_insn (rtx, rtx);
 extern const char *output_call (rtx *);
 extern const char *output_call_mem (rtx *);
 void arm_emit_movpair (rtx, rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 09b5c52..e36deac 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -17602,16 +17602,14 @@ vfp_emit_fstmd (int base_reg, int count)
   return count * 8;
 }
 
-/* Emit a call instruction with pattern PAT.  ADDR is the address of
-   the call target.  */
+/* Process a call instruction with pattern PAT after emission.  ADDR is the
+   address of the call target.  */
 
 void
-arm_emit_call_insn (rtx pat, rtx addr)
+arm_post_emit_call_insn (rtx pat, rtx addr)
 {
   rtx insn;
 
-  insn = emit_call_insn (pat);
-
   /* The PIC register is live on entry to VxWorks PIC PLT entries.
  If the call might use such an entry, add a use of the PIC register
  to the instruction's CALL_INSN_FUNCTION_USAGE.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 8a949b9..45019ae 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9082,7 +9082,7 @@
   TARGET_EITHER
   
   {
-rtx callee, pat;
+rtx callee;
 
 /* In an untyped call, we can get NULL for operand 2.  */
 if (operands[2] == NULL_RTX)
@@ -9097,18 +9097,13 @@
 	: !REG_P (callee))
   XEXP (operands[0], 0) = force_reg (Pmode, callee);
 
-pat = gen_call_internal (operands[0], operands[1], operands[2]);
-arm_emit_call_insn (pat, XEXP (operands[0], 0));
-DONE;
+  }
+  []
+  {
+arm_post_emit_call_insn (_val, XEXP (operands[0], 0));
   }
 )
 
-(define_expand call_internal
-  [(parallel [(call (match_operand 0 memory_operand )
-	(match_operand 1 general_operand ))
-	  (use (match_operand 2  ))
-	  (clobber (reg:SI LR_REGNUM))])])
-
 (define_insn *call_reg_armv5
   [(call (mem:SI (match_operand:SI 0 s_register_operand r))
  (match_operand 1  ))
@@ -9191,7 +9186,7 @@
   TARGET_EITHER
   
   {
-rtx pat, callee;
+rtx callee;
 
 /* In an untyped call, we can get NULL for operand 2.  */
 if (operands[3] == 0)
@@ -9205,21 +9200,13 @@
 	? arm_is_long_call_p (SYMBOL_REF_DECL (callee))
 	: !REG_P (callee))
   XEXP (operands[1], 0) = force_reg (Pmode, callee);
-
-pat = gen_call_value_internal (operands[0], operands[1],
-   operands[2], operands[3]);
-arm_emit_call_insn (pat, XEXP (operands[1], 0));
-DONE;
+  }
+  []
+  {
+arm_post_emit_call_insn (_val, XEXP (operands[1], 0));
   }
 )
 
-(define_expand call_value_internal
-  [(parallel [(set (match_operand   0  )
-	   (call (match_operand 1 memory_operand )
-		 (match_operand 2 general_operand )))
-	  (use (match_operand 3  ))
-	  (clobber (reg:SI LR_REGNUM))])])
-
 (define_insn *call_value_reg_armv5
   [(set (match_operand 0  )
 (call (mem:SI (match_operand:SI 1 s_register_operand r))
diff --git a/gcc/genemit.c b/gcc/genemit.c
index faaa610..aff27f6 100644
--- a/gcc/genemit.c
+++ b/gcc/genemit.c
@@ -422,7 +422,8 @@ gen_expand (rtx expand)
   /* If we don't have any C code to write, only one insn is being written,
  and no MATCH_DUPs are present, we can just return the desired insn
  like we do for a DEFINE_INSN.  This saves memory.  */
-  if ((XSTR 

Re: Add post_expand_call_insn hook

2014-04-29 Thread Tom de Vries

On 29-04-14 20:56, Richard Henderson wrote:

I've written this concept patch, which tries to address the same problem, but
in a different (and I hope more generic) way.

It adds a post-emission C-code operand to define_expand.

As an example of how this could be useful, for the define_expand of call and
call_value in the arm target, I'm using the new operand to do the post-emit
call processing done currently in arm_emit_call_insn. This allows us to
eliminate the call_internal and call_value_internal define_expands, and
simplifies the call and call_value define_expands.


Is this patch really any better?  I can't see that it is myself.  It seems to
me that the existing mechanism to emit the call, then append to FUNCTION_USAGE
is perfectly clear.  This new argument to define_expand seems less clear.

What are you trying to fix, anyway?


Richard,

In arm.md, the define_expand call rtl template is not used, because DONE is 
used in the preparation statements operand. The DONE is there, because we need a 
handle to the emitted insn to do post-emit processing.


The post-emission C-code operand that this patch introduces provides a handle to 
the emitted insn, which means we no longer need to use an explicit emit and DONE 
to get that handle.  And without that DONE, we can use the rtl template of 
define_expand call, and no longer need the call_internal.


So we eliminate an define_expand (which is shorter, and removes duplicate code), 
and deal with expansion inside a single define_expand (which is clearer). To me, 
those are the benefits.


I agree the existing mechanism is perfectly clear.

Still I think that a 'finalization statements' operand is as clear as a 
'preparation statements' operand.


Thanks,
- Tom


[PING] -fuse-caller-save - Collect register usage information

2014-05-12 Thread Tom de Vries

On 26-04-14 14:51, Tom de Vries wrote:

Eric,
Honza,

This patch adds analysis in pass_final to track which hard registers are set or
clobbered by the function body, and stores that information in a
struct cgraph_node, to be used in the fuse-caller-save optmization.

This is the updated version of the previously approved patch
submitted here (http://gcc.gnu.org/ml/gcc-patches/2013-03/msg01320.html ).
The changes are:
- using a new hook call_fusage_contains_non_callee_clobbers,
- incorporating minor review comments from Richard Sandiford
   ( http://gcc.gnu.org/ml/gcc-patches/2014-04/msg01436.html ).

As part of the fuse-caller-save patch series, bootstrapped and reg-tested on
x86_64, and build and reg-tested on MIPS.

Eric, non-cgraph part OK for trunk?

Honza, cgraph part OK for trunk?



Ping. If this patch is approved and committed, I can commit the other approved 
fuse-caller-save patches and enable the optimization for MIPS.


Thanks,
- Tom



Re: -fuse-caller-save - Collect register usage information

2014-05-19 Thread Tom de Vries

On 17-05-14 12:51, Eric Botcazou wrote:

This is the updated version of the previously approved patch
submitted here (http://gcc.gnu.org/ml/gcc-patches/2013-03/msg01320.html ).
The changes are:
- using a new hook call_fusage_contains_non_callee_clobbers,
- incorporating minor review comments from Richard Sandiford
( http://gcc.gnu.org/ml/gcc-patches/2014-04/msg01436.html ).

As part of the fuse-caller-save patch series, bootstrapped and reg-tested on
x86_64, and build and reg-tested on MIPS.

Eric, non-cgraph part OK for trunk?




Eric,

thanks for the review.


I think we should consider creating a new rule: for every target hook added,
another must be first removed...

So this call_fusage_contains_non_callee_clobbers is essentially only a stop
gap measure for the ports that haven't been changed yet?


I think so.

 If so, please add a

??? comment at the beginning of collect_fn_hard_reg_usage:

   /* ??? To be removed when all the ports have been fixed.  */
   if (!targetm.call_fusage_contains_non_callee_clobbers)



Done.


and invoke collect_fn_hard_reg_usage from rest_of_handle_final only when
flag_use_caller_save is true.



Done.


Why do you need to retest them in get_call_reg_set_usage and get_call_fndecl?



The test for flag_use_caller-save in get_call_fndecl was unnecessary, I've 
removed it.


The test in get_call_reg_set_usage for flag_use_caller_save and the hook is 
strictly speaking not necessary. But it's the interface function to retrieve the 
collected register usage information, so it seems a good location to do an 
early-out. I've left it in for now.


Bootstrapped and reg-tested on x86_64.

non-cgraph part OK for trunk?

Thanks,
 - Tom
2014-05-19  Radovan Obradovic  robrado...@mips.com
Tom de Vries  t...@codesourcery.com

	* cgraph.h (struct cgraph_node): Add function_used_regs,
	function_used_regs_initialized and function_used_regs_valid fields.
	* final.c: Move include of hard-reg-set.h to before rtl.h to declare
	find_all_hard_reg_sets.
	(collect_fn_hard_reg_usage, get_call_fndecl, get_call_cgraph_node)
	(get_call_reg_set_usage): New function.
	(rest_of_handle_final): Use collect_fn_hard_reg_usage.
---
 gcc/cgraph.h |   7 
 gcc/final.c  | 115 ++-
 gcc/regs.h   |   4 +++
 3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 84fc1d9..d1f 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -408,6 +408,13 @@ public:
   /* Time profiler: first run of function.  */
   int tp_first_run;
 
+  /* Call unsaved hard registers really used by the corresponding
+ function (including ones used by functions called by the
+ function).  */
+  HARD_REG_SET function_used_regs;
+  /* Set if function_used_regs is valid.  */
+  unsigned function_used_regs_valid: 1;
+
   /* Set when decl is an abstract function pointed to by the
  ABSTRACT_DECL_ORIGIN of a reachable function.  */
   unsigned used_as_abstract_origin : 1;
diff --git a/gcc/final.c b/gcc/final.c
index 3271430..e747b80 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include tree.h
 #include varasm.h
+#include hard-reg-set.h
 #include rtl.h
 #include tm_p.h
 #include regs.h
@@ -57,7 +58,6 @@ along with GCC; see the file COPYING3.  If not see
 #include recog.h
 #include conditions.h
 #include flags.h
-#include hard-reg-set.h
 #include output.h
 #include except.h
 #include function.h
@@ -224,6 +224,7 @@ static int alter_cond (rtx);
 static int final_addr_vec_align (rtx);
 #endif
 static int align_fuzz (rtx, rtx, int, unsigned);
+static void collect_fn_hard_reg_usage (void);
 
 /* Initialize data in final at the beginning of a compilation.  */
 
@@ -4442,6 +4443,8 @@ rest_of_handle_final (void)
   assemble_start_function (current_function_decl, fnname);
   final_start_function (get_insns (), asm_out_file, optimize);
   final (get_insns (), asm_out_file, optimize);
+  if (flag_use_caller_save)
+collect_fn_hard_reg_usage ();
   final_end_function ();
 
   /* The IA-64 .handlerdata directive must be issued before the .endp
@@ -4740,3 +4743,113 @@ make_pass_clean_state (gcc::context *ctxt)
 {
   return new pass_clean_state (ctxt);
 }
+
+/* Collect hard register usage for the current function.  */
+
+static void
+collect_fn_hard_reg_usage (void)
+{
+  rtx insn;
+  int i;
+  struct cgraph_node *node;
+
+  /* ??? To be removed when all the ports have been fixed.  */
+  if (!targetm.call_fusage_contains_non_callee_clobbers)
+return;
+
+  node = cgraph_get_node (current_function_decl);
+  gcc_assert (node != NULL);
+
+  for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn))
+{
+  HARD_REG_SET insn_used_regs;
+
+  if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+  find_all_hard_reg_sets (insn, insn_used_regs, false);
+
+  if (CALL_P (insn)
+	   !get_call_reg_set_usage (insn, insn_used_regs, call_used_reg_set

Don't dump low gimple functions in gimple dump

2014-05-20 Thread Tom de Vries
Honza,

Consider this program:
...
int
main(void)
{
#pragma omp parallel
  {
extern void foo(void);
foo ();
  }
  return 0;
}
...

When compiling this program with -fopenmp, the ompexp pass splits off a new
function called main._omp_fn.0 containing the call to foo.  The new function is
then dumped into the gimple dump by analyze_function.

There are two problems with this:
- the new function is in low gimple, and is dumped next to high gimple
  functions
- since it's already low, the new function is not lowered, and 'goes missing'
  in the dumps following the gimple dump, until it reappears again after the
  last lowering dump.
  [ http://gcc.gnu.org/ml/gcc/2014-03/msg00312.html ]

This patch fixes the problems by ensuring that analyze_function only dumps the
new function to the gimple dump after gimplification (specifically, by moving
the dump_function call into gimplify_function_tree.  That makes the call to
dump_function in finalize_size_functions superfluous).

That also requires us to add a call to dump_function in finalize_task_copyfn,
where we split off a new high gimple function.

And in expand_omp_taskreg and expand_omp_target, where we split off a new low
gimple function, we now dump the new function into the current (ompexp) dump
file, which is the last lowering dump.

Finally, we dump an information statement at the start of
cgraph_add_new_function to give a better idea when and what kind of function is
created.

Bootstrapped and reg-tested on x86_64.

OK for trunk ?

Thanks,
- Tom
2014-05-19  Tom de Vries  t...@codesourcery.com

	* cgraphunit.c (cgraph_add_new_function): Dump message on new function.
	(analyze_function): Don't dump function to gimple dump file.
	* gimplify.c: Add tree-dump.h include.
	(gimplify_function_tree): Dump function to gimple dump file.
	* omp-low.c: Add tree-dump.h include.
	(finalize_task_copyfn): Dump new function to gimple dump file.
	(expand_omp_taskreg, expand_omp_target): Dump new function to dump file.
	* stor-layout.c (finalize_size_functions): Don't dump function to gimple
	dump file.

	* gcc.dg/gomp/dump-task.c: New test.
---
 gcc/cgraphunit.c  | 15 ++-
 gcc/gimplify.c|  3 +++
 gcc/omp-low.c |  6 ++
 gcc/stor-layout.c |  1 -
 gcc/testsuite/gcc.dg/gomp/dump-task.c | 33 +
 5 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/gomp/dump-task.c

diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 9b51135..2ff4079 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -491,6 +491,20 @@ cgraph_add_new_function (tree fndecl, bool lowered)
 {
   gcc::pass_manager *passes = g-get_passes ();
   struct cgraph_node *node;
+
+  if (dump_file)
+{
+  const char *function_type = ((gimple_has_body_p (fndecl))
+   ? (lowered
+  ? low gimple
+  : high gimple)
+   : to-be-gimplified);
+  fprintf (dump_file,
+	   Added new %s function %s to callgraph\n,
+	   function_type,
+	   fndecl_name (fndecl));
+}
+
   switch (cgraph_state)
 {
   case CGRAPH_STATE_PARSING:
@@ -647,7 +661,6 @@ analyze_function (struct cgraph_node *node)
 	 body.  */
   if (!gimple_has_body_p (decl))
 	gimplify_function_tree (decl);
-  dump_function (TDI_generic, decl);
 
   /* Lower the function.  */
   if (!node-lowered)
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 3241633..065bf2c 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -59,6 +59,7 @@ along with GCC; see the file COPYING3.  If not see
 #include omp-low.h
 #include gimple-low.h
 #include cilk.h
+#include tree-dump.h
 
 #include langhooks-def.h	/* FIXME: for lhd_set_decl_assembler_name */
 #include tree-pass.h		/* FIXME: only for PROP_gimple_any */
@@ -8864,6 +8865,8 @@ gimplify_function_tree (tree fndecl)
   cfun-curr_properties = PROP_gimple_any;
 
   pop_cfun ();
+
+  dump_function (TDI_generic, fndecl);
 }
 
 /* Return a dummy expression of type TYPE in order to keep going after an
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a2a64ad..0c497e9 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -71,6 +71,7 @@ along with GCC; see the file COPYING3.  If not see
 #include ipa-prop.h
 #include tree-nested.h
 #include tree-eh.h
+#include tree-dump.h
 
 
 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
@@ -1396,6 +1397,7 @@ finalize_task_copyfn (gimple task_stmt)
   pop_cfun ();
 
   /* Inform the callgraph about the new function.  */
+  dump_function (TDI_generic, child_fn);
   cgraph_add_new_function (child_fn, false);
 }
 
@@ -4843,6 +4845,8 @@ expand_omp_taskreg (struct omp_region *region)
   /* Inform the callgraph about the new function.  */
   DECL_STRUCT_FUNCTION (child_fn)-curr_properties = cfun-curr_properties;
   cgraph_add_new_function (child_fn, true);
+  if (dump_file)
+	dump_function_to_file (child_fn, dump_file, dump_flags

Re: -fuse-caller-save - Collect register usage information

2014-05-29 Thread Tom de Vries

On 29-05-14 00:42, Bill Schmidt wrote:

Tom, the final version of this patch that you committed breaks bootstrap
on powerpc64le-linux-gnu.  The problem is that all uses of the variable
i are guarded by #ifdef STACK_REGS, but the declaration of i is
unconditional.  We get an unused variable warning that becomes an error
during stage 3.



Bill,

thanks for letting me know.

I've bootstrapped attached patch on x86_64, and committed it.

Thanks,
- Tom

2014-05-29  Tom de Vries  t...@codesourcery.com

	* final.c (collect_fn_hard_reg_usage): Guard variable declaration
	with #ifdef STACK_REGS.

diff --git a/gcc/final.c b/gcc/final.c
index a345fe7..c32e177 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -4750,7 +4750,9 @@ static void
 collect_fn_hard_reg_usage (void)
 {
   rtx insn;
+#ifdef STACK_REGS
   int i;
+#endif
   struct cgraph_rtl_info *node;
 
   /* ??? To be removed when all the ports have been fixed.  */


Re: [PATCH 7/7] Move SYMBOL_REF_FLAGS to rtx header

2014-05-29 Thread Tom de Vries

On 10-05-14 22:24, Richard Sandiford wrote:

  /* A set of flags on a symbol_ref that are, in some respects, redundant with
 information derivable from the tree decl associated with this symbol.
@@ -1791,7 +1794,9 @@ #define SYMBOL_REF_CONSTANT(RTX) \
 this information to avoid recomputing it.  Finally, this allows space for
 the target to store more than one bit of information, as with
 SYMBOL_REF_FLAG.  */
-#define SYMBOL_REF_FLAGS(RTX)  X0INT ((RTX), 1)
+#define SYMBOL_REF_FLAGS(RTX) \
+  (RTL_FLAG_CHECK1 (SYMBOL_REF_FLAGS, (RTX), SYMBOL_REF) \
+   -u2.symbol_ref_flags)



Richard,

with an arm-linux-gnueabi non-bootstrap build with --enable-checking=yes,rtl, I 
ran into the following error:

...
/home/vries/gcc_versions/devel/src/libgcc/libgcc2.c:819:1: internal compiler 
error: RTL check: attempt to treat non-block symbol as a block symbol in 
create_block_symbol, at varasm.c:394

 };
 ^
0xc3c16b rtl_check_failed_block_symbol(char const*, int, char const*)
/home/vries/gcc_versions/devel/src/gcc/rtl.c:844
0x103c09d create_block_symbol
/home/vries/gcc_versions/devel/src/gcc/varasm.c:394
0x103f42d make_decl_rtl(tree_node*)
/home/vries/gcc_versions/devel/src/gcc/varasm.c:1379
0x103fc87 notice_global_symbol(tree_node*)
/home/vries/gcc_versions/devel/src/gcc/varasm.c:1552
0x7588bf varpool_finalize_decl(tree_node*)
/home/vries/gcc_versions/devel/src/gcc/cgraphunit.c:823
0xb4eaa0 rest_of_decl_compilation(tree_node*, int, int)
/home/vries/gcc_versions/devel/src/gcc/passes.c:241
0x5902c4 finish_decl(tree_node*, unsigned int, tree_node*, tree_node*, 
tree_node*)
/home/vries/gcc_versions/devel/src/gcc/c/c-decl.c:4521
0x5e8586 c_parser_declaration_or_fndef
/home/vries/gcc_versions/devel/src/gcc/c/c-parser.c:1782
0x5e7644 c_parser_external_declaration
/home/vries/gcc_versions/devel/src/gcc/c/c-parser.c:1399
0x5e72c7 c_parser_translation_unit
/home/vries/gcc_versions/devel/src/gcc/c/c-parser.c:1286
0x606c6d c_parse_file()
/home/vries/gcc_versions/devel/src/gcc/c/c-parser.c:14077
0x66b7fa c_common_parse_file()
/home/vries/gcc_versions/devel/src/gcc/c-family/c-opts.c:1067
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.
...

It looks like BLOCK_SYMBOL_CHECK hasn't been updated.

Patch below fixes it for me. OK for trunk if bootstrap on x86_64 succeeds?

Thanks,
- Tom


2014-05-29  Tom de Vries  t...@codesourcery.com

	* rtl.h (BLOCK_SYMBOL_CHECK): Use SYMBOL_REF_FLAGS.
---
 gcc/rtl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index 02ce424..51cfae5 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -708,7 +708,7 @@ struct GTY(()) rtvec_def {
 
 #define BLOCK_SYMBOL_CHECK(RTX) __extension__\
 ({ __typeof (RTX) const _symbol = (RTX);\
-   const unsigned int flags = RTL_CHECKC1 (_symbol, 1, SYMBOL_REF).rt_int; \
+   const unsigned int flags = SYMBOL_REF_FLAGS (_symbol);		\
if ((flags  SYMBOL_FLAG_HAS_BLOCK_INFO) == 0)			\
  rtl_check_failed_block_symbol (__FILE__, __LINE__,			\
 __FUNCTION__);			\
-- 
1.9.1



  1   2   3   4   5   6   7   8   9   10   >