Re: [AArch64] Fall back to -fPIC if no support of -fpic relocation modifer in assembler

2015-06-30 Thread Andreas Schwab
Jiong Wang jiong.w...@arm.com writes:

 gcc/
   * configure.ac: Add check for aarch64 assembler -fpic relocation
   modifier support.
   * configure: Regenerate.
   * config.in: Regenerate.
   * config/aarch64/aarch64.c (initialize_aarch64_code_model): Fall back to
   -fPIC if not support of -fpic relocation modifier in assembler.

This fixes all testsuite regressions except this one:

gcc.target/aarch64/pic-small.c scan-assembler-times adrp\tx[0-9]+, 
_GLOBAL_OFFSET_TABLE 2

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
And now for something completely different.


Re: [PATCH 1/2] Allow REG_EQUAL for ZERO_EXTRACT

2015-06-30 Thread Maxim Kuvyrkov
 On Jun 30, 2015, at 6:54 AM, Kugan kugan.vivekanandara...@linaro.org wrote:
 
 
 On 29/06/15 21:56, Maxim Kuvyrkov wrote:
 On Jun 28, 2015, at 2:28 PM, Kugan kugan.vivekanandara...@linaro.org 
 wrote:
 
 This patch allows setting REG_EQUAL for ZERO_EXTRACT and handle that in
 cse (where the src for the ZERO_EXTRACT needs to be calculated)
 
 Thanks,
 Kugan
 
 From 75e746e559ffd21b25542b3db627e3b318118569 Mon Sep 17 00:00:00 2001
 From: Kugan Vivekanandarajah kugan.vivekanandara...@linaro.org
 Date: Fri, 26 Jun 2015 17:12:07 +1000
 Subject: [PATCH 1/2] Allow adding REG_EQUAL for ZERO_EXTRACT
 
 ---
 gcc/ChangeLog  |  6 ++
 gcc/cse.c  | 41 -
 gcc/emit-rtl.c |  3 ++-
 3 files changed, 48 insertions(+), 2 deletions(-)
 
 diff --git a/gcc/ChangeLog b/gcc/ChangeLog
 index 080aa39..d4a73d6 100644
 --- a/gcc/ChangeLog
 +++ b/gcc/ChangeLog
 @@ -1,3 +1,9 @@
 +2015-06-26  Kugan Vivekanandarajah  kug...@linaro.org
 +
 +   * cse.c (cse_insn): Calculate src_eqv for ZERO_EXTRACT.
 +   * emit-rtl.c (set_for_reg_notes): Allow ZERO_EXTRACT to set
 +   REG_EQUAL note.
 +
 2015-06-25  H.J. Lu  hongjiu...@intel.com
 
 * gentarget-def.c (def_target_insn): Cast return of strtol to
 diff --git a/gcc/cse.c b/gcc/cse.c
 index 100c9c8..8add651 100644
 --- a/gcc/cse.c
 +++ b/gcc/cse.c
 @@ -4531,8 +4531,47 @@ cse_insn (rtx_insn *insn)
   if (n_sets == 1  REG_NOTES (insn) != 0
(tem = find_reg_note (insn, REG_EQUAL, NULL_RTX)) != 0
(! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl))
 + || GET_CODE (SET_DEST (sets[0].rtl)) == ZERO_EXTRACT
   || GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART))
 -src_eqv = copy_rtx (XEXP (tem, 0));
 +{
 +  src_eqv = copy_rtx (XEXP (tem, 0));
 +
 +  /* If DEST is of the form ZERO_EXTACT, as in:
 +(set (zero_extract:SI (reg:SI 119)
 + (const_int 16 [0x10])
 + (const_int 16 [0x10]))
 + (const_int 51154 [0xc7d2]))
 +REG_EQUAL note will specify the value of register (reg:SI 119) at this
 +point.  Note that this is different from SRC_EQV. We can however
 +calculate SRC_EQV with the position and width of ZERO_EXTRACT.  */
 +  if (GET_CODE (SET_DEST (sets[0].rtl)) == ZERO_EXTRACT)
 
 Consider changing
 
 if (something
 (!rtx_equal_p)
|| ZERO_EXTRACT
|| STRICT_LOW_PART)
 
 to 
 
 if (something
 !rtx_equal_p)
  {
 if (ZERO_EXTRACT)
   {
   }
 else if (STRICT_LOW_PART)
   {
   }
  }
 
 Otherwise looks good to me, but you still need another approval.
 
 Thanks Maxim for the review. How about the attached patch?

Looks good, with a couple of indentation nit-picks below.  No need to repost 
the patch on their account.  Wait for another a maintainer's review.

 --- a/gcc/cse.c
 +++ b/gcc/cse.c
 @@ -4525,14 +4525,49 @@ cse_insn (rtx_insn *insn)
canonicalize_insn (insn, sets, n_sets);
  
/* If this insn has a REG_EQUAL note, store the equivalent value in 
 SRC_EQV,
 - if different, or if the DEST is a STRICT_LOW_PART.  The latter condition
 - is necessary because SRC_EQV is handled specially for this case, and if
 - it isn't set, then there will be no equivalence for the destination.  */
 + if different, or if the DEST is a STRICT_LOW_PART/ZERO_EXTRACT.  The
 + latter condition is necessary because SRC_EQV is handled specially for
 + this case, and if it isn't set, then there will be no equivalence
 + for the destination.  */
if (n_sets == 1  REG_NOTES (insn) != 0
 -   (tem = find_reg_note (insn, REG_EQUAL, NULL_RTX)) != 0
 -   (! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl))
 -   || GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART))
 -src_eqv = copy_rtx (XEXP (tem, 0));
 +   (tem = find_reg_note (insn, REG_EQUAL, NULL_RTX)) != 0)
 +{
 +  if ((! rtx_equal_p (XEXP (tem, 0), SET_SRC (sets[0].rtl)))
 +   || GET_CODE (SET_DEST (sets[0].rtl)) == STRICT_LOW_PART)
 +  src_eqv = copy_rtx (XEXP (tem, 0));

Please double check indentation here.

 +
 +  /* If DEST is of the form ZERO_EXTACT, as in:
 +  (set (zero_extract:SI (reg:SI 119)
 +   (const_int 16 [0x10])
 +   (const_int 16 [0x10]))
 +   (const_int 51154 [0xc7d2]))
 +  REG_EQUAL note will specify the value of register (reg:SI 119) at this
 +  point.  Note that this is different from SRC_EQV. We can however
 +  calculate SRC_EQV with the position and width of ZERO_EXTRACT.  */
 +  else if (GET_CODE (SET_DEST (sets[0].rtl)) == ZERO_EXTRACT
 +CONST_INT_P (src_eqv)

Add a space between  and CONST_INT_P.

 + CONST_INT_P (XEXP (SET_DEST (sets[0].rtl), 1))
 + CONST_INT_P (XEXP (SET_DEST (sets[0].rtl), 2)))
 + {
 +   rtx dest_reg = XEXP (SET_DEST (sets[0].rtl), 0);
 +   rtx width = XEXP (SET_DEST (sets[0].rtl), 1);
 +   rtx pos = XEXP (SET_DEST (sets[0].rtl), 2);
 +   HOST_WIDE_INT val = 

Re: [Patch, MIPS] Enable fp-contract on MIPS and update -mfused-madd

2015-06-30 Thread Richard Biener
On Mon, Jun 29, 2015 at 6:08 PM, Maciej W. Rozycki ma...@linux-mips.org wrote:
 Richard, please have a look at my question below in a reference to your
 previous statement.

 On Thu, 18 Jun 2015, Steve Ellcey wrote:

 OK, I checked in the prequel patch and here is a new copy of the
 original patch based off of that (and with no HONOR_NAN checks in the
 fma/madd instructions).

 OK for checkin?

  Please see below for my notes.

 2015-06-18  Steve Ellcey  sell...@imgtec.com

   * config.gcc (mips*-*-*): Add fused-madd.opt.

  Please use angle brackets as per
 https://www.gnu.org/prep/standards/html_node/Indicating-the-Part-Changed.html,
 i.e.:

 * config.gcc mips*-*-*: Add fused-madd.opt.

 There's no function or similar entity involved here and `mips*-*-*' is a
 case value like with the C language's `switch' statement where you'd use
 angle brackets too to refer to individual cases.

   (*nmsub4mode_fastmath)  Update condition.

  Extraneous space here.

 diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
 index f6912e1..4f5692c 100644
 --- a/gcc/config/mips/mips.md
 +++ b/gcc/config/mips/mips.md
 [...]
 +;; fnma is defined in GCC as (fma (neg op1) op2 op3)
 +;; (-op1 * op2) + op3 == -(op1 * op2) + op3 == -((op1 * op2) - op3)
 +;; The mips nmsub instructions implement -((op1 * op2) - op3)
 +;; This transformation means we may return the wrong signed zero
 +;; so we check HONOR_SIGNED_ZEROS.
 +
 +(define_expand fnmamode4
 +  [(set (match_operand:ANYF 0 register_operand)
 + (fma:ANYF (neg:ANYF (match_operand:ANYF 1 register_operand))
 +   (match_operand:ANYF 2 register_operand)
 +   (match_operand:ANYF 3 register_operand)))]
 +  (ISA_HAS_FUSED_MADD3 || ISA_HAS_FUSED_MADD4)
 +!HONOR_SIGNED_ZEROS (MODEmode))

  Have you considered the alternative/complementary approach proposed by
 Richard here: http://gcc.gnu.org/ml/gcc-patches/2010-11/msg00680.html,
 i.e. to introduce further expanders, e.g.:

 fmanM4: (neg:M (fma:M OP1 OP2 OP3)) (multiply-add, negated)

 fmsnM4: (neg:M (fma:M OP1 OP2 (neg:M OP3))) (multiply-subtract, negated)

 ?

  These patterns wouldn't need a check for !HONOR_SIGNED_ZEROS as they
 match the respective hardware instructions in an exact manner.  Therefore
 I think they would be more useful as they would also suit software that
 claims/requires full IEEE Std 754 compliance.

  Richard, do you maintain the introduction of these additional operations
 would be a good idea and one you're willing to support for the purpose of
 patch acceptance/approval if implemented?

Yes, esp. if there is now a second architecture that has such instructions.

Thanks,
Richard.

 +;; fnms is defined as: (fma (neg op1) op2 (neg op3))
 +;; ((-op1) * op2) - op3 == -(op1 * op2) - op3 == -((op1 * op2) + op3)
 +;; The mips nmadd instructions implement -((op1 * op2) + op3)
 +;; This transformation means we may return the wrong signed zero
 +;; so we check HONOR_SIGNED_ZEROS.
 +
 +(define_expand fnmsmode4
 +  [(set (match_operand:ANYF 0 register_operand)
 + (fma:ANYF
 +   (neg:ANYF (match_operand:ANYF 1 register_operand))
 +   (match_operand:ANYF 2 register_operand)
 +   (neg:ANYF (match_operand:ANYF 3 register_operand]
 +  (ISA_HAS_FUSED_MADD3 || ISA_HAS_FUSED_MADD4)
 +!HONOR_SIGNED_ZEROS (MODEmode))

  Same observation here.

  The change looks good to me otherwise.

   Maciej


Re: [RFA] Factor conversion out of COND_EXPR using match.pd pattern

2015-06-30 Thread Richard Biener
On Mon, Jun 29, 2015 at 7:51 PM, Jeff Law l...@redhat.com wrote:
 On 06/01/2015 04:55 AM, Richard Biener wrote:

 On Sat, May 30, 2015 at 11:11 AM, Marc Glisse marc.gli...@inria.fr
 wrote:

 (only commenting on the technique, not on the transformation itself)

 +(simplify
 +  (cond @0 (convert @1) INTEGER_CST@2)
 +  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
 +COMPARISON_CLASS_P (@0)



 If you add COMPARISON_CLASS_P to define_predicates, you could write:
 (cond COMPARISON_CLASS_P@0 (convert @1) INTEGER_CST@2)


 But that would fail to match on GIMPLE, so I don't like either variant
 as Jeffs relies on the awkward fact that on GIMPLE cond expr conditions
 are GENERIC and yours wouldn't work.

 That said - for this kind of patterns testcases that exercise the patterns
 on GIMPLE would be very appreciated.

 It may be the case that these patterns don't make a lot of sense on gimple
 and should be restricted to generic, at least with our current
 infrastructure.

 The problem is when we lower from generic to gimple, we end up with branchy
 code, not straight line code and there's no good way I see to write a
 match.pd pattern which encompasses flow control.

 So to discover the MIN/MAX with typecast, we're probably stuck hacking
 minmax_replacement to know when it can ignore the typecast.

 That may in fact be a good thing -- I haven't looked closely yet, but 45397
 may be of a similar nature (no good chance to see straight line code for
 match.pd, thus we're forced to handle it in phiopt).


 So do we want to restrict the new pattern on GENERIC, then rely on phiopt to
 get smarter and catch this stuff for GIMPLE?  Or drop the pattern totally
 and do everything in phiopt on GIMPLE?

Note that IMHO it doesn't make a lot of sense to add match.pd patterns
restricted
to GENERIC - those should simply go to / stay in fold-const.c.  For patterns
restricted to GIMPLE match.pd is still the proper place.

As for matching control flow it's actually not that difficult to get
it working at
least for toplevel COND_EXPRs.  The trick is to match on the PHI nodes
(like phiopt does), thus for

  if (cond)
...
  _3 = PHI _4, _5

ask the match-and-simplify machinery

  if (gimple_simplify (COND_EXPR, TREE_TYPE (_3), cond, _4, _5, ...))

which will then for example match

(simplify
 (cond (gt @0 @1) @0 @1)
 (max @0 @1))

for non-toplevel COND_EXPRs we'd need to adjust the matching code itself
to handle PHI defs.

Of course with this there are several complications arising.  One is cost
as the conditional might not go away (other code may be control dependet
on it).  One is SSA form - if you get complicated enough patterns you
might end up substituting a value into the result that is computed in a place
that does not dominate the PHI result (so you'd need to insert a PHI node
for it and figure out a value for the other edges ... which might mean such
patterns would be invalid anyway?).

So it's indeed not clear if re-writing phiopt to match.pd patterns is possible
or desirable.


 or maybe use a for loop on comparisons, which would give names to
 TREE_OPERAND (@0, *). This should even handle the operand_equal_p
 alternative:

 (cond (cmp:c@0 @1 @2) (convert @1) INTEGER_CST@2)


 Yes, that would be my reference.

 But won't this require pointer equivalence?  Are INTEGER_CST nodes fully
 shared?  What if @1 is something more complex than a _DECL node (remember,
 we're working with GENERIC).  So something like
 (cond (cmp:c@0 @1 @2) (convert @3) INTEGER_CST@4))

 And using operand_equal_p seems more appropriate to me (and is still better
 than the original (cond @0 ...) and grubbing around inside @0 to look at
 operands.

We do use operand_equal_p to query whether @0 and @0 are equal.


 +int_fits_type_p (@2, TREE_TYPE (@1))
 +((operand_equal_p (TREE_OPERAND (@0, 0), @2, 0)
 +operand_equal_p (TREE_OPERAND (@0, 1), @1, 0))
 +  || (operand_equal_p (TREE_OPERAND (@0, 0), @1, 0)
 +   operand_equal_p (TREE_OPERAND (@0, 1), @2, 0
 +(with { tree itype = TREE_TYPE (@1); tree otype = TREE_TYPE (@2); }
 +  (convert:otype (cond:itype @0 @1 (convert:itype @2))



 This should be enough, no need to specify the outer type
 (convert (cond:itype @0 @1 (convert:itype @2))


 Yes.

 I believe we should not have to write cond:itype here, cond should be
 made
 to use the type of its second argument instead of the first one, by
 default
 (expr::gen_transform already has a few special cases).


 Indeed.  Patch welcome (I'd have expected it already works...)

 With Marc's fix, those explicit types are no longer needed.

Good.

Richard.

 Jeff


Re: [PATCH, PR66652] Use max_loop_iterations in transform_to_exit_first_loop_alt

2015-06-30 Thread Tom de Vries

On 29/06/15 19:58, Jeff Law wrote:

On 06/29/2015 08:24 AM, Tom de Vries wrote:

Hi,

this patch fixes PR66652.

It uses max_loop_iterations in transform_to_exit_first_loop_alt to
ensure that the new loop bound nit + 1 doesn't overflow.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom

0001-Use-max_loop_iterations-in-transform_to_exit_first_l.patch


Use max_loop_iterations in transform_to_exit_first_loop_alt

2015-06-29  Tom de Vriest...@codesourcery.com

PR tree-optimization/66652
* tree-parloops.c (try_transform_to_exit_first_loop_alt): Use
max_loop_iterations to determine if nit + 1 overflows.

* testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c (f): Rewrite
using restrict pointers.
(main): Add arguments to calls to f.
* testsuite/libgomp.c/parloops-exit-first-loop-alt.c: Same.

* gcc.dg/parloops-exit-first-loop-alt-pr66652.c: New test.
* gcc.dg/parloops-exit-first-loop-alt-3.c (f):  Rewrite using
restrict
pointers.
* gcc.dg/parloops-exit-first-loop-alt.c: Same.

OK.

Make sure to put the PR marker in the testsuite/ChangeLog entry


True, I abbreviate it in the log, but my commit script ( 
https://github.com/vries/bin-scripts/blob/master/git-prepare-gnu-commit.sh 
) applies the PR line to all the ChangeLog hunks.



and drop
the testsuite/ prefix in the testsuite/ChangeLog entry.



The testsuite/ prefix is used because the it's for the ChangeLog in dir 
libgomp.


Committed as attached:
- fixed typo in comment
- updated test-case modifications to be applicable on trunk (given that
  there's no approval yet for the fix for PR66642, see
  https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01768.html)
- included ChangeLog file changes, given the ChangeLog questions.

Thanks,
- Tom
Use max_loop_iterations in transform_to_exit_first_loop_alt

2015-06-30  Tom de Vries  t...@codesourcery.com

	PR tree-optimization/66652
	* tree-parloops.c (try_transform_to_exit_first_loop_alt): Use
	max_loop_iterations to determine if nit + 1 overflows.

	* testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c (f): Rewrite
	using restrict pointers.
	(main): Add arguments to calls to f.
	* testsuite/libgomp.c/parloops-exit-first-loop-alt.c: Same.

	* gcc.dg/parloops-exit-first-loop-alt-pr66652.c: New test.
	* gcc.dg/parloops-exit-first-loop-alt-3.c (f):  Rewrite using restrict
	pointers.
	* gcc.dg/parloops-exit-first-loop-alt.c: Same.
---
 gcc/ChangeLog  |  6 +
 gcc/testsuite/ChangeLog|  8 ++
 .../gcc.dg/parloops-exit-first-loop-alt-3.c|  2 +-
 .../gcc.dg/parloops-exit-first-loop-alt-pr66652.c  | 31 ++
 .../gcc.dg/parloops-exit-first-loop-alt.c  | 19 +
 gcc/tree-parloops.c| 31 ++
 libgomp/ChangeLog  |  8 ++
 .../libgomp.c/parloops-exit-first-loop-alt-3.c |  4 +--
 .../libgomp.c/parloops-exit-first-loop-alt.c   |  5 ++--
 9 files changed, 97 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 96f4f8d..1940d04 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2015-06-30  Tom de Vries  t...@codesourcery.com
+
+	PR tree-optimization/66652
+	* tree-parloops.c (try_transform_to_exit_first_loop_alt): Use
+	max_loop_iterations to determine if nit + 1 overflows.
+
 2015-06-30  Bin Cheng  bin.ch...@arm.com
 
 	* tree-ssa-loop-ivopts.c (record_sub_use): Don't reset ssa_name
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9c2f20b..f249a7d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2015-06-30  Tom de Vries  t...@codesourcery.com
+
+	PR tree-optimization/66652
+	* gcc.dg/parloops-exit-first-loop-alt-pr66652.c: New test.
+	* gcc.dg/parloops-exit-first-loop-alt-3.c (f):  Rewrite using restrict
+	pointers.
+	* gcc.dg/parloops-exit-first-loop-alt.c: Same.
+
 2015-06-29  Paolo Carlini  paolo.carl...@oracle.com
 
 	PR c++/65977
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c
index b0fde37..fec53a1 100644
--- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c
@@ -7,7 +7,7 @@
 unsigned int *a;
 
 unsigned int
-f (unsigned int n)
+f (unsigned int n, unsigned int *__restrict__ a)
 {
   int i;
   unsigned int sum = 1;
diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c
new file mode 100644
index 000..2ea097d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target pthread } */
+/* { dg-options -O2 -ftree-parallelize-loops=2 -fdump-tree-parloops } */
+
+#include stdio.h

Re: [patch] fix regrename pass to ensure renamings produce valid insns

2015-06-30 Thread Eric Botcazou
 I notice the way gcc_assert() is defined in system.h now, the test won't
 disappear even when runtime checks are disabled, though you might still
 adjust it to avoid any programmer confusion.

It will disappear at run time, see the definition:

/* Include EXPR, so that unused variable warnings do not occur.  */
#define gcc_assert(EXPR) ((void)(0  (EXPR)))

so you really need to use a separate variable.

-- 
Eric Botcazou


Re: [PATCH] Move X - (X / Y) * Y folding to match.pd

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marc Glisse wrote:

 On Mon, 29 Jun 2015, Marek Polacek wrote:
 
  On Mon, Jun 29, 2015 at 09:36:59AM +0200, Richard Biener wrote:
Anything wrong with this?

+/* X - (X / Y) * Y is the same as X % Y.  */
+(simplify
+ (minus (convert? @0) (convert? (mult (trunc_div @0 @1) @1)))
+ (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+  (convert (trunc_mod @0 @1
  
  That looks awfully similar to a variant I also tried (but I remember
  having convert1? and convert2? in it).  Not sure what was wrong with
  that one; certainly yours seems to work fine.
 
 Afterwards I thought of a limitation. Nothing bad, but it highlights a trap I
 regularly fall into: several @0 in the same pattern may have different types
 (for INTEGER_CST, operand_equal_p mostly ignores the type). So for an int x,
 42L-42/x*x should fail to match, while using convert1? and convert2? should
 match.

Indeed that's a subtle issue with using operand_equal_p for matching
operands.  Note that 42L-42/x*x will appear as
42L-(long)(42/x*x) in the IL just in case that wasn't obvious.

Thus ok to adjust the pattern to convert1? / convert2? if you add such
a testcase (maybe also add the one that the variants you tried on
originally failed to match).

Richard.


Re: [PATCH] Discard Scops for which entry==exit

2015-06-30 Thread Richard Biener
On Tue, Jun 30, 2015 at 8:11 AM, Tobias Grosser tob...@grosser.es wrote:
 On 06/30/2015 02:09 AM, Sebastian Pop wrote:

 On Mon, Jun 29, 2015 at 3:04 PM, Aditya Kumar hiradi...@msn.com wrote:

 In this patch we discard the scops where entry and exit are the same BB.
 This is an effort to remove graphite-scop-detection.c:limit_scops.
 Removing the limit_scops function introduces correctness regressions.
 We are making relevant changes in incremental steps to fix those bugs,
 and finally we intend to remove limit_scops.

 2015-06-29  Aditya Kumar  aditya...@samsung.com
  Sebastian Pop s@samsung.com

  * graphite-scop-detection.c (build_scops_1): Discard scops for
 which entry==exit


 Looks good to me.
 Let's wait on comments from Tobi before pushing this patch.


 Hi Sebastian,

 the commit message should probably give a short reasoning why scops with
 entry == exit need to be discarded. I currently don't see why they would be
 incorrect/problematic (despite being possibly very small/empty).

Dependent on how GRAPHITE initializes loops even a loop can consist of a single
basic-block (without LOOPS_HAVE_SIMPLE_LATCHES where you always have at
least two BBs for a loop).

I suppose GRAPHITE does nothing for non-loops and this is what the check is
about?  (so rather require a backedge in the SCOP?)

Of ocurse I also don't see correctness issues here, just maybe a waste of
compile-time?

Richard.

 Tobias


Re: [AArch64] Fall back to -fPIC if no support of -fpic relocation modifer in assembler

2015-06-30 Thread Jiong Wang

Andreas Schwab writes:

 Jiong Wang jiong.w...@arm.com writes:

 gcc/
   * configure.ac: Add check for aarch64 assembler -fpic relocation
   modifier support.
   * configure: Regenerate.
   * config.in: Regenerate.
   * config/aarch64/aarch64.c (initialize_aarch64_code_model): Fall back to
   -fPIC if not support of -fpic relocation modifier in assembler.

 This fixes all testsuite regressions except this one:

 gcc.target/aarch64/pic-small.c scan-assembler-times adrp\tx[0-9]+,
 _GLOBAL_OFFSET_TABLE 2

Thanks for testing.

We need another dejagnu check, something like
check_effective_target_fpic, to restrict this testcase only tested
when -fpic supported.

But the problem is the existed check_effective_target_fpic assume if
-fpic not supported, then warning will be issued in compile stage, while
on AArch64, if -fpic is not supported, we will not issue warning, we
just fall back to -fPIC.

What I can think of to fix this is add a new check
check_effective_target_fpic_asm which check whether those new
relocation types are supported by binutils, I don't have any other
better idea, and this will be a seperate patch.

So, is this patch OK for trunk?

-- 
Regards,
Jiong



Re: [PATCH] Graphite cannot handle return stmt

2015-06-30 Thread Richard Biener
On Tue, Jun 30, 2015 at 8:12 AM, Tobias Grosser tob...@grosser.es wrote:
 On 06/30/2015 02:12 AM, Sebastian Pop wrote:

 On Mon, Jun 29, 2015 at 3:58 PM, Aditya Kumar hiradi...@msn.com wrote:

 No regressions.

 2015-06-29  Aditya Kumar  aditya...@samsung.com
  Sebastian Pop s@samsung.com

  * graphite-scop-detection.c (stmt_simple_for_scop_p): Bail out
 in case of a return statement.


 Looks good to me.
 Tobi, do you see a good reason not to cut scops at return stmts?


 Return stmts in a SCoP are definitely invalid. Now, as in my last email, I
 wonder why this is not a positive list. There are probably a lot of gimple
 codes that are invalid inside scops. By default we should refuse everything
 we do _not_ know.

The function already does that.  It just accepted GIMPLE_RETURN as valid.

IMHO a

   default:
 gcc_unreachable ();

is the best style as it forces you to list everything explicitely.
The function should
be refactored to do all codes in the switch stmt (GIMPLE_ASMs are handled
in a if, so are GIMPLE_DEBUG for example).

Richard.

 Best,
 Tobias


[Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-06-30 Thread Ajit Kumar Agarwal
All:

The below patch added a new path Splitting optimization pass on SSA 
representation. The Path Splitting optimization
Pass moves the join block of if-then-else same as loop latch to its 
predecessors and get merged with the predecessors
Preserving the SSA representation.

The patch is tested for Microblaze and i386 target. The EEMBC/Mibench 
benchmarks is run with the Microblaze target
And the performance gain of 9.15% and rgbcmy01_lite(EEMBC benchmarks). The Deja 
GNU tests is run for Mircroblaze 
Target and no regression is seen for Microblaze target and the new testcase 
attached are passed.

For i386 bootstrapping goes through fine and the Spec cpu2000 benchmarks is run 
with this patch. Following observation
were seen with spec cpu2000 benchmarks. 

Ratio of path splitting change vs Ratio of not having path splitting change is 
3653.353 vs 3652.14 for INT benchmarks.
Ratio of path splitting change vs Ratio of not having path splitting change is  
4353.812 vs 4345.351 for FP benchmarks.

Based on comments from RFC patch following changes were done.

1. Added a new pass for path splitting changes.
2. Placed the new path  Splitting Optimization pass before the copy propagation 
pass.
3. The join block same as the Loop latch is wired into its predecessors so that 
the CFG Cleanup pass will merge the blocks
Wired together.
4. Copy propagation routines added for path splitting changes is not needed as 
suggested by Jeff. They are removed in the patch as
The copy propagation in the copied join blocks will be done by the existing 
copy propagation pass and the update ssa pass.
5. Only the propagation of phi results of the join block with the phi argument 
is done which will not be done by the existing update_ssa
Or copy propagation pass on tree ssa representation.
6. Added 2 tests.
a) compilation check  tests.
   b) execution tests.
7. Refactoring of the code for the feasibility check and finding the join block 
same as loop latch node.

[Patch,tree-optimization]: Add new path Splitting pass on tree ssa 
representation.

Added a new pass on path splitting on tree SSA representation. The path
splitting optimization does the CFG transformation of join block of the
if-then-else same as the loop latch node is moved and merged with the
predecessor blocks after preserving the SSA representation.

ChangeLog:
2015-06-30  Ajit Agarwal  ajit...@xilinx.com

* gcc/Makefile.in: Add the build of the new file
tree-ssa-path-split.c
* gcc/common.opt: Add the new flag ftree-path-split.
* gcc/opts.c: Add an entry for Path splitting pass
with optimization flag greater and equal to O2.
* gcc/passes.def: Enable and add new pass path splitting.
* gcc/timevar.def: Add the new entry for TV_TREE_PATH_SPLIT.
* gcc/tree-pass.h: Extern Declaration of make_pass_path_split.
* gcc/tree-ssa-path-split.c: New file for path splitting pass.
* gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c: New testcase.
* gcc/testsuite/gcc.dg/path-split-1.c: New testcase.

Signed-off-by:Ajit Agarwal ajit...@xilinx.com.

gcc/Makefile.in  |   1 +
 gcc/common.opt   |   4 +
 gcc/opts.c   |   1 +
 gcc/passes.def   |   1 +
 gcc/testsuite/gcc.dg/path-split-1.c  |  65 
 gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c |  62 
 gcc/timevar.def  |   1 +
 gcc/tree-pass.h  |   1 +
 gcc/tree-ssa-path-split.c| 462 +++
 9 files changed, 598 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/path-split-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c
 create mode 100644 gcc/tree-ssa-path-split.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 5f9261f..35ac363 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1476,6 +1476,7 @@ OBJS = \
tree-vect-slp.o \
tree-vectorizer.o \
tree-vrp.o \
+tree-ssa-path-split.o \
tree.o \
valtrack.o \
value-prof.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index e104269..c63b100 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2328,6 +2328,10 @@ ftree-vrp
 Common Report Var(flag_tree_vrp) Init(0) Optimization
 Perform Value Range Propagation on trees
 
+ftree-path-split
+Common Report Var(flag_tree_path_split) Init(0) Optimization
+Perform Path Splitting
+
 funit-at-a-time
 Common Report Var(flag_unit_at_a_time) Init(1) Optimization
 Compile whole compilation unit at a time
diff --git a/gcc/opts.c b/gcc/opts.c
index 8a16116..31947ff 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -508,6 +508,7 @@ static const struct default_options default_options_table[] 
=
 { OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 },
 { OPT_LEVELS_2_PLUS, OPT_fipa_ra, NULL, 1 },
 

Re: [AArch64][TLSLE][3/N] Add UNSPEC_TLSLE

2015-06-30 Thread Christophe Lyon
Hi Jiong,

It seems to me that you committed patches 1/N, 2/N and 3/N as only 1
commit (rev 225121).

Am I right?


On 26 June 2015 at 16:39, Marcus Shawcroft marcus.shawcr...@gmail.com wrote:
 On 20 May 2015 at 12:21, Jiong Wang jiong.w...@arm.com wrote:

 Add new unspec name UNSPEC_TLSLE, use it for all tlsle pattern.

 ok for trunk?

 2015-05-19  Jiong Wang  jiong.w...@arm.com

 gcc/
   * config/aarch64/aarch64.md (UNSPEC_TLSLE): New enumeration.
   (tlsle): Use new unspec name.
   (tlsle_mode): Ditto.

 OK /Marcus


[PATCH] Improve VRP for sign-changed/widened compares

2015-06-30 Thread Richard Biener

This improves VRP for cases of

  x = (T) y;
  if (y !=/== CST)
{
  ... use of X

to insert asserts for X similar to how we handle x = y +- CST.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-06-30  Richard Biener  rguent...@suse.de

* tree-vrp.c (register_edge_assert_for_2): Also register
asserts for dominating conversion results.

Index: gcc/tree-vrp.c
===
*** gcc/tree-vrp.c  (revision 225115)
--- gcc/tree-vrp.c  (working copy)
*** register_edge_assert_for_2 (tree name, e
*** 5359,5365 
/* In the case of post-in/decrement tests like if (i++) ... and uses
   of the in/decremented value on the edge the extra name we want to
   assert for is not on the def chain of the name compared.  Instead
!  it is in the set of use stmts.  */
if ((comp_code == NE_EXPR
 || comp_code == EQ_EXPR)
 TREE_CODE (val) == INTEGER_CST)
--- 5359,5367 
/* In the case of post-in/decrement tests like if (i++) ... and uses
   of the in/decremented value on the edge the extra name we want to
   assert for is not on the def chain of the name compared.  Instead
!  it is in the set of use stmts.
!  Similar cases happen for conversions that were simplified through
!  fold_{sign_changed,widened}_comparison.  */
if ((comp_code == NE_EXPR
 || comp_code == EQ_EXPR)
 TREE_CODE (val) == INTEGER_CST)
*** register_edge_assert_for_2 (tree name, e
*** 5368,5396 
gimple use_stmt;
FOR_EACH_IMM_USE_STMT (use_stmt, ui, name)
{
- /* Cut off to use-stmts that are in the predecessor.  */
- if (gimple_bb (use_stmt) != e-src)
-   continue;
- 
  if (!is_gimple_assign (use_stmt))
continue;
  
! enum tree_code code = gimple_assign_rhs_code (use_stmt);
! if (code != PLUS_EXPR
!  code != MINUS_EXPR)
continue;
  
! tree cst = gimple_assign_rhs2 (use_stmt);
! if (TREE_CODE (cst) != INTEGER_CST)
continue;
  
! tree name2 = gimple_assign_lhs (use_stmt);
! if (live_on_edge (e, name2))
{
  cst = int_const_binop (code, val, cst);
- register_new_assert_for (name2, name2, comp_code, cst,
-  NULL, e, bsi);
}
}
  }
   
--- 5370,5406 
gimple use_stmt;
FOR_EACH_IMM_USE_STMT (use_stmt, ui, name)
{
  if (!is_gimple_assign (use_stmt))
continue;
  
! /* Cut off to use-stmts that are dominating the predecessor.  */
! if (!dominated_by_p (CDI_DOMINATORS, e-src, gimple_bb (use_stmt)))
continue;
  
! tree name2 = gimple_assign_lhs (use_stmt);
! if (TREE_CODE (name2) != SSA_NAME
! || !live_on_edge (e, name2))
continue;
  
! enum tree_code code = gimple_assign_rhs_code (use_stmt);
! tree cst;
! if (code == PLUS_EXPR
! || code == MINUS_EXPR)
{
+ cst = gimple_assign_rhs2 (use_stmt);
+ if (TREE_CODE (cst) != INTEGER_CST)
+   continue;
  cst = int_const_binop (code, val, cst);
}
+ else if (CONVERT_EXPR_CODE_P (code))
+   cst = fold_convert (TREE_TYPE (name2), val);
+ else
+   continue;
+ 
+ if (TREE_OVERFLOW_P (cst))
+   cst = drop_tree_overflow (cst);
+ register_new_assert_for (name2, name2, comp_code, cst,
+  NULL, e, bsi);
}
  }
   


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marc Glisse

On Tue, 30 Jun 2015, Marek Polacek wrote:


This moves a simple optimization.  Here it's plain to see how :c
removes the need to duplicate code to handle commutativity.


Note that the same transformation would work for plus and xor.


I put some more converts into the pattern, but then it's turned
out that I also need the tree_nop_conversion_p (otherwise we'd
regress binop-notor2.c that uses booleans).


I don't really see why removing tree_nop_conversion_p would regress 
anything (though you would probably need to build the all_ones constant in 
TREE_TYPE (@0) and convert that to type). For my curiosity, could you 
explain a bit more?


--
Marc Glisse


Re: [PATCH GCC][refacor]Manage allocation of struct iv in obstack.

2015-06-30 Thread Richard Biener
On Tue, Jun 30, 2015 at 4:31 AM, Bin.Cheng amker.ch...@gmail.com wrote:
 On Sat, Jun 27, 2015 at 5:13 AM, Jeff Law l...@redhat.com wrote:
 On 06/26/2015 03:02 AM, Bin Cheng wrote:

 Hi,
 GCC avoids multi-pointers/dangling-pointers of struct iv by allocating
 multiple copies of the structure.  This patch is an obvious fix to the
 issue
 by managing iv structures in obstack.

 Bootstrap on x86_64, will apply to trunk if no objection.

 Thanks,
 bin

 2015-06-26  Bin Cheng  bin.ch...@arm.com

 * tree-ssa-loop-ivopts.c (struct ivopts_data): New field
 iv_obstack.
 (tree_ssa_iv_optimize_init): Initialize iv_obstack.
 (alloc_iv): New parameter.  Allocate struct iv using
 obstack_alloc.
 (set_iv, find_interesting_uses_address, add_candidate_1): New
 argument.
 (find_interesting_uses_op): Don't duplicate struct iv.
 (free_loop_data): Don't free iv structure explicitly.
 (tree_ssa_iv_optimize_finalize): Free iv_obstack.

 Presumably you're trying to simplify the memory management  here so that you
 don't have to track lifetimes of the IV structures so carefully, which in
 turn simplifies some upcoming patch?
 Yes, that's exactly the reason.  I am still on the way fixing
 missed-optimizations in IVO, and plan to do some
 refactoring/simplification afterwards.

 Note we don't have a no objection policy for this kind of patch. However,
 I think it may make sense to look into having you as a maintainer for the IV
 optimizations if you're interested.
 Oh, that would be my great honor.

I'd support that.  Bin has done high quality work on IVOPTs in the past and he
knows when to ask questions (not that there ever were simple answers
to those...).

Thanks,
Richard.

 Thanks,
 bin

 Jeff



[PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marek Polacek
This moves a simple optimization.  Here it's plain to see how :c
removes the need to duplicate code to handle commutativity.

I put some more converts into the pattern, but then it's turned
out that I also need the tree_nop_conversion_p (otherwise we'd
regress binop-notor2.c that uses booleans).

I did a regtest with the patterns in fold-const.c removed to see
whether we have some testing for this folding -- and there were
no regressions, so I had to write a test.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-06-29  Marek Polacek  pola...@redhat.com

* fold-const.c (fold_binary_loc): Move ~X | X folding ...
* match.pd: ... here.

* gcc.dg/fold-ior-2.c: New test.

diff --git gcc/fold-const.c gcc/fold-const.c
index a447452..caba0cf 100644
--- gcc/fold-const.c
+++ gcc/fold-const.c
@@ -10928,24 +10928,6 @@ fold_binary_loc (location_t loc,
 
 case BIT_IOR_EXPR:
 bit_ior:
-  /* ~X | X is -1.  */
-  if (TREE_CODE (arg0) == BIT_NOT_EXPR
-  operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
-   {
- t1 = build_zero_cst (type);
- t1 = fold_unary_loc (loc, BIT_NOT_EXPR, type, t1);
- return omit_one_operand_loc (loc, type, t1, arg1);
-   }
-
-  /* X | ~X is -1.  */
-  if (TREE_CODE (arg1) == BIT_NOT_EXPR
-  operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
-   {
- t1 = build_zero_cst (type);
- t1 = fold_unary_loc (loc, BIT_NOT_EXPR, type, t1);
- return omit_one_operand_loc (loc, type, t1, arg0);
-   }
-
   /* Canonicalize (X  C1) | C2.  */
   if (TREE_CODE (arg0) == BIT_AND_EXPR
   TREE_CODE (arg1) == INTEGER_CST
diff --git gcc/match.pd gcc/match.pd
index 0cf3d21..5dcbc1a 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -283,6 +283,12 @@ along with GCC; see the file COPYING3.  If not see
   (bit_and @0 integer_zerop@1)
   @1)
 
+/* ~x | x - -1 */
+(simplify
+ (bit_ior:c (convert? @0) (convert? (bit_not @0)))
+ (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+  { build_all_ones_cst (type); }))
+
 /* x ^ x - 0 */
 (simplify
   (bit_xor @0 @0)
diff --git gcc/testsuite/gcc.dg/fold-ior-2.c gcc/testsuite/gcc.dg/fold-ior-2.c
index e69de29..6abac9e 100644
--- gcc/testsuite/gcc.dg/fold-ior-2.c
+++ gcc/testsuite/gcc.dg/fold-ior-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-cddce1 } */
+
+int
+fn1 (int x)
+{
+  return ~x | x;
+}
+
+int
+fn2 (int x)
+{
+  return x | ~x;
+}
+
+unsigned int
+fn3 (unsigned int x)
+{
+  return ~x | x;
+}
+
+unsigned int
+fn4 (unsigned int x)
+{
+  return ~x | x;
+}
+
+int
+fn5 (int x)
+{
+  return ~x | (unsigned) x;
+}
+
+int
+fn6 (int x)
+{
+  return (unsigned) ~x | x;
+}
+
+int
+fn7 (int x)
+{
+  return ~(unsigned) x | x;
+}
+
+/* { dg-final { scan-tree-dump-not ~ cddce1 } } */
+/* { dg-final { scan-tree-dump-not  \\|  cddce1 } } */

Marek


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marek Polacek wrote:

 This moves a simple optimization.  Here it's plain to see how :c
 removes the need to duplicate code to handle commutativity.
 
 I put some more converts into the pattern, but then it's turned
 out that I also need the tree_nop_conversion_p (otherwise we'd
 regress binop-notor2.c that uses booleans).
 
 I did a regtest with the patterns in fold-const.c removed to see
 whether we have some testing for this folding -- and there were
 no regressions, so I had to write a test.
 
 Bootstrapped/regtested on x86_64-linux, ok for trunk?

Ok.

Thanks,
Richard.

 2015-06-29  Marek Polacek  pola...@redhat.com
 
   * fold-const.c (fold_binary_loc): Move ~X | X folding ...
   * match.pd: ... here.
 
   * gcc.dg/fold-ior-2.c: New test.
 
 diff --git gcc/fold-const.c gcc/fold-const.c
 index a447452..caba0cf 100644
 --- gcc/fold-const.c
 +++ gcc/fold-const.c
 @@ -10928,24 +10928,6 @@ fold_binary_loc (location_t loc,
  
  case BIT_IOR_EXPR:
  bit_ior:
 -  /* ~X | X is -1.  */
 -  if (TREE_CODE (arg0) == BIT_NOT_EXPR
 -operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
 - {
 -   t1 = build_zero_cst (type);
 -   t1 = fold_unary_loc (loc, BIT_NOT_EXPR, type, t1);
 -   return omit_one_operand_loc (loc, type, t1, arg1);
 - }
 -
 -  /* X | ~X is -1.  */
 -  if (TREE_CODE (arg1) == BIT_NOT_EXPR
 -operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
 - {
 -   t1 = build_zero_cst (type);
 -   t1 = fold_unary_loc (loc, BIT_NOT_EXPR, type, t1);
 -   return omit_one_operand_loc (loc, type, t1, arg0);
 - }
 -
/* Canonicalize (X  C1) | C2.  */
if (TREE_CODE (arg0) == BIT_AND_EXPR
  TREE_CODE (arg1) == INTEGER_CST
 diff --git gcc/match.pd gcc/match.pd
 index 0cf3d21..5dcbc1a 100644
 --- gcc/match.pd
 +++ gcc/match.pd
 @@ -283,6 +283,12 @@ along with GCC; see the file COPYING3.  If not see
(bit_and @0 integer_zerop@1)
@1)
  
 +/* ~x | x - -1 */
 +(simplify
 + (bit_ior:c (convert? @0) (convert? (bit_not @0)))
 + (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
 +  { build_all_ones_cst (type); }))
 +
  /* x ^ x - 0 */
  (simplify
(bit_xor @0 @0)
 diff --git gcc/testsuite/gcc.dg/fold-ior-2.c gcc/testsuite/gcc.dg/fold-ior-2.c
 index e69de29..6abac9e 100644
 --- gcc/testsuite/gcc.dg/fold-ior-2.c
 +++ gcc/testsuite/gcc.dg/fold-ior-2.c
 @@ -0,0 +1,47 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -fdump-tree-cddce1 } */
 +
 +int
 +fn1 (int x)
 +{
 +  return ~x | x;
 +}
 +
 +int
 +fn2 (int x)
 +{
 +  return x | ~x;
 +}
 +
 +unsigned int
 +fn3 (unsigned int x)
 +{
 +  return ~x | x;
 +}
 +
 +unsigned int
 +fn4 (unsigned int x)
 +{
 +  return ~x | x;
 +}
 +
 +int
 +fn5 (int x)
 +{
 +  return ~x | (unsigned) x;
 +}
 +
 +int
 +fn6 (int x)
 +{
 +  return (unsigned) ~x | x;
 +}
 +
 +int
 +fn7 (int x)
 +{
 +  return ~(unsigned) x | x;
 +}
 +
 +/* { dg-final { scan-tree-dump-not ~ cddce1 } } */
 +/* { dg-final { scan-tree-dump-not  \\|  cddce1 } } */
 
   Marek
 
 

-- 
Richard Biener rguent...@suse.de
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham 
Norton, HRB 21284 (AG Nuernberg)


[patch committed SH] Fix PR target/64833

2015-06-30 Thread Kaz Kojima
The attached patch is to fix PR target/64833 which is a 4.9/5/6
regression.  The target reorg may replace casesi_worker_1 insn
with casesi_worker_2 with a temporary wrong length for PIC.
It can result the wrong position of the constant pool.  See PR
target/64833 for details.
The patch set the length of casesi_worker_1 to 8 for PIC so to
avoid that misplacement of the constant pool.  It's tested on
sh4-unknown-linux-gnu.  Committed on trunk.  I'll backport it
to 5 and 4.9.

Regards,
kaz
--
2015-06-30  Kaz Kojima  kkoj...@gcc.gnu.org

PR target/64833
* config/sh/sh.md (casesi_worker_1): Set length to 8 when
flag_pic is set.

diff --git a/config/sh/sh.md b/config/sh/sh.md
index 35113c0..5c8d306 100644
--- a/config/sh/sh.md
+++ b/config/sh/sh.md
@@ -11344,6 +11344,8 @@ label:
 LABEL_NUSES (operands[2])++;
 })
 
+;; This may be replaced with casesi_worker_2 in sh_reorg for PIC.
+;; The insn length is set to 8 for that case.
 (define_insn casesi_worker_1
   [(set (match_operand:SI 0 register_operand =r,r)
(unspec:SI [(reg:SI R0_REG)
@@ -11375,7 +11377,9 @@ label:
   gcc_unreachable ();
 }
 }
-  [(set_attr length 4)])
+  [(set_attr_alternative length
+ [(if_then_else (match_test flag_pic) (const_int 8) (const_int 4))
+  (if_then_else (match_test flag_pic) (const_int 8) (const_int 4))])])
 
 (define_insn casesi_worker_2
   [(set (match_operand:SI 0 register_operand =r,r)


Re: [PATCH] config/bfin/bfin.c (hwloop_optimize): Use return false instead of gcc_assert for checking jump_insn.

2015-06-30 Thread Chen Gang
On 06/30/2015 03:46 AM, Bernd Schmidt wrote:
 On 06/28/2015 04:15 PM, Chen Gang wrote:
 For bfin looping optimization, after lsetup optimization, it can have
 the correct lsetup related insns which causes gcc_assert for jump_insn.
 
 I've been debugging this for a bit, and at least the explanation of the
 patch is wrong - it's finding an LSETUP for a different loop. There
 seems to be an inconsistency in the CFG, and it looks like it's caused
 by the unusual (?) situation that both arms out of a conditional branch
 lead directly to a hwloop candidate.


For me, the more details are:

 - The insns have 2 loops which can be lsetup optimized.

 - After hwloop_optimize finishes 1st lsetup optimization, it generates
   new lsetup insn which appends to jump insn in the basic block (which
   causes the insns are not 'standard' but OK for code generation).

 - In 2nd lsetup optimization, hwloop_optimize found the insns are not
   'standard' (they are generated by hwloop_optimize itself in 1st
   lsetup optimization), so gcc_assert().

So hwloop_optimize can give up lsetup optimization for the 'unstandard'
insns, at present, all things will be OK.

If we want to try perfect, we can let hwloop_optimize can process the
'unstandard' insns for lsetup optimization. But excuse me, I am not
quite familiar with gcc internal or bfin (so I am not suitable for it).


 So, not OK until further investigation I think.
 

If necessary (what I said above is incorrect), I shall continue to
analyse.

Thanks.
-- 
Chen Gang

Open, share, and attitude like air, water, and life which God blessed


RE: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-06-30 Thread Ajit Kumar Agarwal
I forgot to attach the Link of the RFC comments from Jeff  for reference.

https://gcc.gnu.org/ml/gcc/2015-05/msg00302.html

Thanks  Regards
Ajit

-Original Message-
From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-ow...@gcc.gnu.org] On 
Behalf Of Ajit Kumar Agarwal
Sent: Tuesday, June 30, 2015 1:46 PM
To: l...@redhat.com; GCC Patches
Cc: Vinod Kathail; Shail Aditya Gupta; Vidhumouli Hunsigida; Nagaraju Mekala
Subject: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa 
representation

All:

The below patch added a new path Splitting optimization pass on SSA 
representation. The Path Splitting optimization Pass moves the join block of 
if-then-else same as loop latch to its predecessors and get merged with the 
predecessors Preserving the SSA representation.

The patch is tested for Microblaze and i386 target. The EEMBC/Mibench 
benchmarks is run with the Microblaze target And the performance gain of 9.15% 
and rgbcmy01_lite(EEMBC benchmarks). The Deja GNU tests is run for Mircroblaze 
Target and no regression is seen for Microblaze target and the new testcase 
attached are passed.

For i386 bootstrapping goes through fine and the Spec cpu2000 benchmarks is run 
with this patch. Following observation were seen with spec cpu2000 benchmarks. 

Ratio of path splitting change vs Ratio of not having path splitting change is 
3653.353 vs 3652.14 for INT benchmarks.
Ratio of path splitting change vs Ratio of not having path splitting change is  
4353.812 vs 4345.351 for FP benchmarks.

Based on comments from RFC patch following changes were done.

1. Added a new pass for path splitting changes.
2. Placed the new path  Splitting Optimization pass before the copy propagation 
pass.
3. The join block same as the Loop latch is wired into its predecessors so that 
the CFG Cleanup pass will merge the blocks Wired together.
4. Copy propagation routines added for path splitting changes is not needed as 
suggested by Jeff. They are removed in the patch as The copy propagation in the 
copied join blocks will be done by the existing copy propagation pass and the 
update ssa pass.
5. Only the propagation of phi results of the join block with the phi argument 
is done which will not be done by the existing update_ssa Or copy propagation 
pass on tree ssa representation.
6. Added 2 tests.
a) compilation check  tests.
   b) execution tests.
7. Refactoring of the code for the feasibility check and finding the join block 
same as loop latch node.

[Patch,tree-optimization]: Add new path Splitting pass on tree ssa 
representation.

Added a new pass on path splitting on tree SSA representation. The path
splitting optimization does the CFG transformation of join block of the
if-then-else same as the loop latch node is moved and merged with the
predecessor blocks after preserving the SSA representation.

ChangeLog:
2015-06-30  Ajit Agarwal  ajit...@xilinx.com

* gcc/Makefile.in: Add the build of the new file
tree-ssa-path-split.c
* gcc/common.opt: Add the new flag ftree-path-split.
* gcc/opts.c: Add an entry for Path splitting pass
with optimization flag greater and equal to O2.
* gcc/passes.def: Enable and add new pass path splitting.
* gcc/timevar.def: Add the new entry for TV_TREE_PATH_SPLIT.
* gcc/tree-pass.h: Extern Declaration of make_pass_path_split.
* gcc/tree-ssa-path-split.c: New file for path splitting pass.
* gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c: New testcase.
* gcc/testsuite/gcc.dg/path-split-1.c: New testcase.

Signed-off-by:Ajit Agarwal ajit...@xilinx.com.

gcc/Makefile.in  |   1 +
 gcc/common.opt   |   4 +
 gcc/opts.c   |   1 +
 gcc/passes.def   |   1 +
 gcc/testsuite/gcc.dg/path-split-1.c  |  65 
 gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c |  62 
 gcc/timevar.def  |   1 +
 gcc/tree-pass.h  |   1 +
 gcc/tree-ssa-path-split.c| 462 +++
 9 files changed, 598 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/path-split-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c
 create mode 100644 gcc/tree-ssa-path-split.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 5f9261f..35ac363 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1476,6 +1476,7 @@ OBJS = \
tree-vect-slp.o \
tree-vectorizer.o \
tree-vrp.o \
+tree-ssa-path-split.o \
tree.o \
valtrack.o \
value-prof.o \
diff --git a/gcc/common.opt b/gcc/common.opt index e104269..c63b100 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2328,6 +2328,10 @@ ftree-vrp
 Common Report Var(flag_tree_vrp) Init(0) Optimization  Perform Value Range 
Propagation on trees
 
+ftree-path-split

Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marek Polacek
On Tue, Jun 30, 2015 at 02:47:49PM +0200, Richard Biener wrote:
 On Tue, 30 Jun 2015, Marek Polacek wrote:
 
  On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
   Does my suggestion to build the all_ones constant in TREE_TYPE (@0) and
   convert that to type help for that?
  
  It appears to work, but it seems weird to me to create a integer constant
  in one type and then immediately cast it to another type.
 
 Yes.  Do you have a testcase now that fails using bools?

I don't have a testcase that fails with the pattern we currently have, i.e.
the one with tree_nop_conversion_p.

Marek


Re: [Patch, fortran] PR52846 - [F2008] Support submodules

2015-06-30 Thread Paul Richard Thomas
Dear All,

Please find attached the latest version of the submodule patch. You
might note that the number of the patch has been incremented by two.
The intermediate version went to Reinhold Bader and co for testing.
The main improvements in this version are:
(i) As reported by Reinhold, the interface declaration of the function
result in the abbreviated form of module procedure was not being
correctly incorporated. The only reason why the previous version
worked at all was that I wasn't using implicit none... or, rather,  I
did not detect the error for that reason ***blush*** This has been
fixed in parse.c(get_modproc_result);
(ii) Repetition of the dummy or result declarations is now caught and
prevents a segfault as the parser state stack dies; and
(iii) Various new errors are tested in submodule_4.f90 and Reinhold's
test has been added as submodule_6.f90.

Please note that the issue with PRIVATE statements or attributes in
modules has not been corrected yet. I will correct this once the
attached has been committed. Private components are handled correctly,
however.

Bootstrapped and regtested on FC21/x86_64 - OK for trunk?

If the patch is not reviewed by Thursday evening (21:00 CET), I will
commit it anyway unless Reinhold or Salvatore come up with and
further, significant issues. The new elements in the patch are well
ring-fenced by new attributes or F2008 specific declarations and so I
do not believe that it will cause any regressions. Any minor issues
that come up can be dealt with later. I am anxious to get to work on
the PRIVATE issue and want to do so on the basis of this patch being
committed.

With best regards

Paul

2015-06-30  Paul Thomas  pa...@gcc.gnu.org

PR fortran/52846
* decl.c (get_proc_name): Make a partially populated interface
symbol to carry the characteristics of a module procedure and
its result.
(variable_decl): Declarations of dummies or results in the
abreviated form of module procedure is an error.
(gfc_match_import): IMPORT is not permitted in the interface
declaration of module procedures.
(match_attr_spec): Submodule variables have implicit save
attribute for F2008 onwards.
(gfc_match_prefix): Add 'module' as the a prefix and set the
module_procedure attribute.
(gfc_match_formal_arglist): For a module procedure keep the
interface formal_arglist from the interface, match new the
formal arguments and then compare the number and names of each.
(gfc_match_procedure): Add case COMP_SUBMODULE.
(gfc_match_function_decl, gfc_match_subroutine_decl): Set the
module_procedure attribute.
(gfc_match_entry, gfc_match_end):  Add case COMP_SUBMODULE. If
attr abr_modproc_decl is set, switch the message accordingly
for subroutines and functions.
(gfc_match_submod_proc): New function to match the abbreviated
style of submodule declaration.
* gfortran.h : Add ST_SUBMODULE and ST_END_SUBMODULE. Add the
attribute bits 'used_in_submodule' and 'module_procedure'. Add
the bit field 'abr_modproc_decl' to gfc_symbol. Add prototypes
for 'gfc_copy_dummy_sym', 'gfc_check_dummy_characteristics' and
'gfc_check_result_characteristics'.
* interface.c : Add the prefix 'gfc_' to the names of functions
'check_dummy(result)_characteristics' and all their references.
* match.h : Add prototype for 'gfc_match_submod_proc' and
'gfc_match_submodule'.
* module.c (gfc_match_submodule): New function. Add handling
for the 'module_procedure' attribute bit.
* parse.c (decode_statement): Set attr has_'import_set' for
the interface declaration of module procedures. Handle a match
occurring in 'gfc_match_submod_proc' and a match for
'submodule'.
(gfc_enclosing_unit): Include the state COMP_SUBMODULE.
(gfc_ascii_statement): Add END SUBMODULE.
(accept_statement): Add ST_SUBMODULE.
(parse_spec): Disallow statement functions in a submodule
specification part.
(parse_contained): Add ST_END_SUBMODULE and COMP_SUBMODULE
twice each.
(get_modproc_result): Copy the result symbol of the interface.
(parse_progunit): Call it.
(set_syms_host_assoc): Make symbols from the ancestor module
and submodules use associated, as required by the standard and
set all private components public. Module procedures 'external'
attribute bit is reset and the 'used_in_submodule' bit is set.
(parse_module): If this is a submodule, use the ancestor module
and submodules. Traverse the namespace, calling
'set_syms_host_assoc'. Add ST_END_SUBMODULE and COMP_SUBMODULE.
* parse.h : Add COMP_SUBMODULE.
* primary.c (match_variable): Add COMP_SUBMODULE.
* resolve.c (compare_fsyms): New function to compare the dummy
characteristics of a module procedure with its interface.
(resolve_fl_procedure): Compare the procedure, result and dummy
characteristics of a module_procedure with its interface, using
'compare_fsyms' for the 

Re: [PATCH 2/2] Add leon3r0 and leon3r0v7 CPU targets

2015-06-30 Thread Daniel Cederman

On 2015-06-30 11:24, Eric Botcazou wrote:

The UT699 is a leon3r0 system which does not support CASA. However, to
enable the errata fixes for UT699 with -mfix-ut699 requires the CPU
target to be leon3.


-mfix-ut699 itself is independent of the processor and doesn't require leon3.


The instruction timing also differs between leon and leon3 and they are
represented by different targets in binutils.


Yes, there is only one optimization trick for the scheduler that requires
leon3, but it doesn't affect correctness.  We could easily change that, i.e.
enable the trick for leon too if -mfix-ut699 is passed.



Thank you for the patch in your other mail that changes this!

We were also thinking of the instruction timing information found in the 
leon_costs and leon3_costs. We took a look at the values in leon_costs 
and they seem to fit well with the UT699, except for division. We got a 
bit unsure as to what leon system they are based on, as the division 
cost was wrong also for the AT697F, which is the most common leon2 
system. Would it be ok to update the division cost values of leon_costs 
so that they match UT699 and AT697F?


In general, depending on how one instantiate a leon system and which FPU 
is selected, you will get different timing. Is there a recommended way 
of adding support for this without adding additional CPU targets?
We are considering to add support for GRFPU-lite, which only differs in 
the timing.



As for binutils, they don't even know about leon3, eveything is leon for them.



Yes, that was a misunderstanding from my part.

--
Daniel Cederman


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marek Polacek
On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
 Does my suggestion to build the all_ones constant in TREE_TYPE (@0) and
 convert that to type help for that?

It appears to work, but it seems weird to me to create a integer constant
in one type and then immediately cast it to another type.

Marek


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
  Does my suggestion to build the all_ones constant in TREE_TYPE (@0) and
  convert that to type help for that?
 
 It appears to work, but it seems weird to me to create a integer constant
 in one type and then immediately cast it to another type.

Yes.  Do you have a testcase now that fails using bools?

   Marek
 
 

-- 
Richard Biener rguent...@suse.de
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham 
Norton, HRB 21284 (AG Nuernberg)


Re: [PATCH] top-level for libvtv: use normal (not raw_cxx) target exports

2015-06-30 Thread Jonathan Wakely

On 29/06/15 18:57 +0200, Michael Haubenwallner wrote:

Actually, an unexpected libstdc++-v3/configure change is introduced by some
inconsistency in https://gcc.gnu.org/viewcvs/gcc?view=revisionrevision=223196
where acinclude.m4 contains different code than the recreated configure:

acinclude.m4-3973: case ${target_os} in
acinclude.m4#3974:   gnu* | linux* | solaris*)
acinclude.m4-3975: GCC_TRY_COMPILE_OR_LINK(

 configure-79218:   case ${target_os} in
 configure#79219:   gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu | solaris*)
 configure-79220: if test x$gcc_no_link = xyes; then

Not sure how to handle such kind of inconsistencies though...


That's just a bug, there should be no inconsistency.

Now fixed by this patch.

commit bba7f350afcdf6bda0fa88bd421c46e1943db16f
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Jun 30 13:52:50 2015 +0100

	* configure: Regenerate.

diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 3654b68..1b99c06 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -79216,7 +79216,7 @@ $as_echo_n checking for sendfile that can copy files...  6; }
   $as_echo_n (cached)  6
 else
   case ${target_os} in
-  gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu | solaris*)
+  gnu* | linux* | solaris*)
 if test x$gcc_no_link = xyes; then
   cat confdefs.h - _ACEOF conftest.$ac_ext
 /* end confdefs.h.  */


Re: [gomp4.1] Support #pragma omp target {enter,exit} data

2015-06-30 Thread Jakub Jelinek
On Tue, Jun 30, 2015 at 03:19:30PM +0300, Ilya Verbin wrote:
 --- a/libgomp/target.c
 +++ b/libgomp/target.c
 @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool 
 do_copyfrom)
bool do_unmap = false;
if (k-refcount  1)
   k-refcount--;
 -  else if (k-async_refcount  0)
 - k-async_refcount--;
 -  else
 - do_unmap = true;
 +  else if (k-refcount == 1)
 + {
 +   if (k-async_refcount  0)
 + k-async_refcount--;
 +   else
 + {
 +   k-refcount--;
 +   do_unmap = true;
 + }
 + }

What is the rationale of this hunk change?
BTW, we'll likely need to treat also refcount == INT_MAX as special (never
decrease it), because I believe declare target vars are supposed to have
refcount of infinity rather than just 2GB-1.

 @@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t 
 mapnum, void **hostaddrs,
  }
  
if (is_enter_data)
 -{
 -  /* TODO  */
 -}
 +gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 
 false);

This will leak the return value.  Either we need to arrange not to allocate
it for enter data, or we need to assign it to some variable and free
immediately (we don't want to perform the release operations for it).

else
 -{
 -  /* TODO  */
 -}
 +for (i = 0; i  mapnum; i++)
 +  {
 + struct splay_tree_key_s cur_node;
 + unsigned char kind = kinds[i]  typemask;
 + switch (kind)
 +   {
 +   case GOMP_MAP_FROM:
 +   case GOMP_MAP_ALWAYS_FROM:
 +   case GOMP_MAP_DELETE:
 +   case GOMP_MAP_RELEASE:
 + cur_node.host_start = (uintptr_t) hostaddrs[i];
 + cur_node.host_end = cur_node.host_start + sizes[i];
 + gomp_mutex_lock (devicep-lock);

I don't really like locking the mutex for each map clause in exit data
separately.  Perhaps just add a gomp_exit_data function similar to
gomp_map_vars that will run this loop and be surrounded by the locking,
or do it inline, but with the lock/unlock around the whole loop.
exit data construct must have at least one map clause, so it doesn't make
sense not to lock immediately.

 + splay_tree_key k = splay_tree_lookup (devicep-mem_map, cur_node);
 + if (!k)
 +   {
 + gomp_mutex_unlock (devicep-lock);
 + continue;
 +   }
 +
 + if (k-refcount  0)
 +   k-refcount--;
 + if (kind == GOMP_MAP_DELETE)
 +   k-refcount = 0;

See above, I believe delete should not delete refcount == INT_MAX
mappings.

Jakub


[PING][patch] Run testsuite/libgomp.c++/c++.exp at -O2 by default

2015-06-30 Thread Tom de Vries

On 15/06/15 18:36, Tom de Vries wrote:

Hi,

this patch:
- sets DEFAULT_CFLAGS to -O2, if not set otherwise
   (similar to what is done in c.exp)
- removes superfluous dg-options -O2 settings.
- removes superfluous dg-options -fopenmp settings.
- uses dg-additional-options for -std=standard settings

Tested on x86_64.



Ping. Original posting at 
https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01026.html.



OK for trunk?


Thanks,
- Tom



[PATCH] Cut down match-and-simplify TDF_DETAILS noise

2015-06-30 Thread Richard Biener

This short-cuts re-simplifying (convert ...)s which often are no-ops
because a conditional convert didn't match.  So instead of throwing
the whole match-and-simplify machinery on such converts and yell
out that match.pd:961 triggered (that really happens often...) the
following simply does nothing for matching types.

Code-generation changes like

--- gimple-match.c.orig 2015-06-30 13:32:17.218895640 +0200
+++ gimple-match.c  2015-06-30 13:36:23.681291002 +0200
@@ -2771,21 +2771,33 @@
 {
   tree ops2[1], res;
   ops2[0] = captures[2];
+  if (utype != TREE_TYPE (ops2[0])
+   !useless_type_conversion_p (utype, TREE_TYPE (ops2[0])))
+  {
   code_helper tem_code = NOP_EXPR;
   tree tem_ops[3] = { ops2[0] };
   gimple_resimplify1 (seq, tem_code, utype, tem_ops, valueize);
   res = maybe_push_res_to_seq (tem_code, utype, tem_ops, seq);
   if (!res) return false;
+  }
+  else
+res = ops2[0];
   ops1[0] = res;
 }

for GIMPLE and the following for GENERIC

--- generic-match.c.orig2015-06-30 13:36:20.158257155 +0200
+++ generic-match.c 2015-06-30 13:36:23.707291254 +0200
@@ -2348,13 +2348,19 @@
 {
   tree ops2[1], res;
   ops2[0] = captures[2];
+  if (TREE_TYPE (ops2[0]) != utype)
   res = fold_build1_loc (loc, NOP_EXPR, utype, ops2[0]);
+  else
+res = ops2[0];
   ops1[0] = res;
 }

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-06-30  Richard Biener rguent...@suse.de

* genmatch.c (expr::gen_transform): Shortcut re-simplifying
of converts to avoid uninteresting noise from the conversion
simplifying patterns.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 225163)
+++ gcc/genmatch.c  (working copy)
@@ -1740,6 +1740,10 @@ expr::gen_transform (FILE *f, const char
 
   if (gimple)
 {
+  if (*operation == CONVERT_EXPR)
+   fprintf (f,   if (%s != TREE_TYPE (ops%d[0])\n
+  !useless_type_conversion_p (%s, TREE_TYPE (ops%d[0])))\n
+ {\n, type, depth, type, depth);
   /* ???  Building a stmt can fail for various reasons here, seq being
  NULL or the stmt referencing SSA names occuring in abnormal PHIs.
 So if we fail here we should continue matching other patterns.  */
@@ -1752,9 +1756,15 @@ expr::gen_transform (FILE *f, const char
   ops.length (), type);
   fprintf (f,   res = maybe_push_res_to_seq (tem_code, %s, tem_ops, 
seq);\n
 if (!res) return false;\n, type);
+  if (*operation == CONVERT_EXPR)
+fprintf (f,   }\n
+  else\n
+res = ops%d[0];\n, depth);
 }
   else
 {
+  if (*operation == CONVERT_EXPR)
+   fprintf (f,   if (TREE_TYPE (ops%d[0]) != %s)\n, depth, type);
   if (operation-kind == id_base::CODE)
fprintf (f,   res = fold_build%d_loc (loc, %s, %s,
 ops.length(), opr, type);
@@ -1764,6 +1774,9 @@ expr::gen_transform (FILE *f, const char
   for (unsigned i = 0; i  ops.length (); ++i)
fprintf (f, , ops%d[%u], depth, i);
   fprintf (f, );\n);
+  if (*operation == CONVERT_EXPR)
+   fprintf (f,   else\n
+res = ops%d[0];\n, depth);
 }
   fprintf (f, %s = res;\n, dest);
   fprintf (f, }\n);


RE: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-06-30 Thread Ajit Kumar Agarwal


-Original Message-
From: Richard Biener [mailto:richard.guent...@gmail.com] 
Sent: Tuesday, June 30, 2015 4:42 PM
To: Ajit Kumar Agarwal
Cc: l...@redhat.com; GCC Patches; Vinod Kathail; Shail Aditya Gupta; Vidhumouli 
Hunsigida; Nagaraju Mekala
Subject: Re: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa 
representation

On Tue, Jun 30, 2015 at 10:16 AM, Ajit Kumar Agarwal 
ajit.kumar.agar...@xilinx.com wrote:
 All:

 The below patch added a new path Splitting optimization pass on SSA 
 representation. The Path Splitting optimization Pass moves the join 
 block of if-then-else same as loop latch to its predecessors and get merged 
 with the predecessors Preserving the SSA representation.

 The patch is tested for Microblaze and i386 target. The EEMBC/Mibench 
 benchmarks is run with the Microblaze target And the performance gain 
 of 9.15% and rgbcmy01_lite(EEMBC benchmarks). The Deja GNU tests is run for 
 Mircroblaze Target and no regression is seen for Microblaze target and the 
 new testcase attached are passed.

 For i386 bootstrapping goes through fine and the Spec cpu2000 
 benchmarks is run with this patch. Following observation were seen with spec 
 cpu2000 benchmarks.

 Ratio of path splitting change vs Ratio of not having path splitting change 
 is 3653.353 vs 3652.14 for INT benchmarks.
 Ratio of path splitting change vs Ratio of not having path splitting change 
 is  4353.812 vs 4345.351 for FP benchmarks.

 Based on comments from RFC patch following changes were done.

 1. Added a new pass for path splitting changes.
 2. Placed the new path  Splitting Optimization pass before the copy 
 propagation pass.
 3. The join block same as the Loop latch is wired into its 
 predecessors so that the CFG Cleanup pass will merge the blocks Wired 
 together.
 4. Copy propagation routines added for path splitting changes is not 
 needed as suggested by Jeff. They are removed in the patch as The copy 
 propagation in the copied join blocks will be done by the existing copy 
 propagation pass and the update ssa pass.
 5. Only the propagation of phi results of the join block with the phi 
 argument is done which will not be done by the existing update_ssa Or copy 
 propagation pass on tree ssa representation.
 6. Added 2 tests.
 a) compilation check  tests.
b) execution tests.
 7. Refactoring of the code for the feasibility check and finding the join 
 block same as loop latch node.

 [Patch,tree-optimization]: Add new path Splitting pass on tree ssa 
 representation.

 Added a new pass on path splitting on tree SSA representation. The path
 splitting optimization does the CFG transformation of join block of the
 if-then-else same as the loop latch node is moved and merged with the
 predecessor blocks after preserving the SSA representation.

 ChangeLog:
 2015-06-30  Ajit Agarwal  ajit...@xilinx.com

 * gcc/Makefile.in: Add the build of the new file
 tree-ssa-path-split.c
 * gcc/common.opt: Add the new flag ftree-path-split.
 * gcc/opts.c: Add an entry for Path splitting pass
 with optimization flag greater and equal to O2.
 * gcc/passes.def: Enable and add new pass path splitting.
 * gcc/timevar.def: Add the new entry for TV_TREE_PATH_SPLIT.
 * gcc/tree-pass.h: Extern Declaration of make_pass_path_split.
 * gcc/tree-ssa-path-split.c: New file for path splitting pass.
 * gcc/testsuite/gcc.dg/tree-ssa/path-split-2.c: New testcase.
 * gcc/testsuite/gcc.dg/path-split-1.c: New testcase.

I'm not 100% sure I understand the transform but what I see from the 
testcases it tail-duplicates from a conditional up to a loop latch block (not 
sure if it includes it and thus ends up creating a loop nest or not).

The path splitting pass  wired the duplicated basic block of  the loop latch 
block to both of its predecessor path, if the loop latch block 
is same as join block. The CFG cleanup phase of the path splitting 
transformation merges the basic blocks which is wired with the original 
predecessors and thus making the loop latch block just  as forwarding block of 
the predecessors  with the sequential statements of the 
loop latch block is set as NULL having only the phi nodes, and the same Loop 
semantics with respect to loop latch edge is preserved
Also the SSA updates are preserved.

Thanks  Regards
Ajit 

An observation I have is that the pass should at least share the transform 
stage to some extent with the existing tracer pass (tracer.c) which 
essentially does the same but not restricted to loops in any way.  So I 
wonder if your pass could be simply another heuristic to compute paths to 
trace in the existing tracer pass.


Thanks,
Richard.

 Signed-off-by:Ajit Agarwal ajit...@xilinx.com.

 gcc/Makefile.in  |   1 +
  gcc/common.opt   |   4 +
  gcc/opts.c   |   1 +
  

Re: [patch] Run testsuite/libgomp.c++/c++.exp at -O2 by default

2015-06-30 Thread Jakub Jelinek
On Mon, Jun 15, 2015 at 06:36:17PM +0200, Tom de Vries wrote:
 Hi,
 
 this patch:
 - sets DEFAULT_CFLAGS to -O2, if not set otherwise
   (similar to what is done in c.exp)
 - removes superfluous dg-options -O2 settings.
 - removes superfluous dg-options -fopenmp settings.
 - uses dg-additional-options for -std=standard settings
 
 Tested on x86_64.
 
 OK for trunk?

Ok, thanks.

Jakub


[gomp4.1] Support #pragma omp target {enter,exit} data

2015-06-30 Thread Ilya Verbin
Hi!

This patch implements GOMP_target_enter_exit_data in libgomp, also it fixes a
bug in gomp_map_vars_existing.
make check-target-libgomp passed.
However, I am afraid that there may be some hard-to-find issues (like memory
leaks) in cases of mixed (structured+unstructured) data mappings...
OK for gomp-4_1-branch?


libgomp/
* target.c (gomp_map_vars_existing): Fix target address for 'always to'
array sections.
(gomp_unmap_vars): Decrement k-refcount when it's 1 and
k-async_refcount is 0.
(GOMP_target_enter_exit_data): Add mapping/unmapping.
* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
sections.
* testsuite/libgomp.c/target-12.c: New test.


diff --git a/libgomp/target.c b/libgomp/target.c
index a394e95..83ca827 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -171,7 +171,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, 
splay_tree_key oldn,
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
 devicep-host2dev_func (devicep-target_id,
-   (void *) (oldn-tgt-tgt_start + oldn-tgt_offset),
+   (void *) (oldn-tgt-tgt_start + oldn-tgt_offset
+ + newn-host_start - oldn-host_start),
(void *) newn-host_start,
newn-host_end - newn-host_start);
   oldn-refcount++;
@@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool 
do_copyfrom)
   bool do_unmap = false;
   if (k-refcount  1)
k-refcount--;
-  else if (k-async_refcount  0)
-   k-async_refcount--;
-  else
-   do_unmap = true;
+  else if (k-refcount == 1)
+   {
+ if (k-async_refcount  0)
+   k-async_refcount--;
+ else
+   {
+ k-refcount--;
+ do_unmap = true;
+   }
+   }
 
   if ((do_unmap  do_copyfrom  tgt-list[i].copy_from)
  || tgt-list[i].always_copy_from)
@@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, 
void **hostaddrs,
 }
 
   if (is_enter_data)
-{
-  /* TODO  */
-}
+gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 
false);
   else
-{
-  /* TODO  */
-}
+for (i = 0; i  mapnum; i++)
+  {
+   struct splay_tree_key_s cur_node;
+   unsigned char kind = kinds[i]  typemask;
+   switch (kind)
+ {
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
+ case GOMP_MAP_DELETE:
+ case GOMP_MAP_RELEASE:
+   cur_node.host_start = (uintptr_t) hostaddrs[i];
+   cur_node.host_end = cur_node.host_start + sizes[i];
+   gomp_mutex_lock (devicep-lock);
+   splay_tree_key k = splay_tree_lookup (devicep-mem_map, cur_node);
+   if (!k)
+ {
+   gomp_mutex_unlock (devicep-lock);
+   continue;
+ }
+
+   if (k-refcount  0)
+ k-refcount--;
+   if (kind == GOMP_MAP_DELETE)
+ k-refcount = 0;
+
+   if ((kind == GOMP_MAP_FROM  k-refcount == 0)
+   || kind == GOMP_MAP_ALWAYS_FROM)
+ devicep-dev2host_func (devicep-target_id,
+ (void *) cur_node.host_start,
+ (void *) (k-tgt-tgt_start
+   + k-tgt_offset
+   + cur_node.host_start
+   - k-host_start),
+ cur_node.host_end - cur_node.host_start);
+   if (k-refcount == 0)
+ {
+   splay_tree_remove (devicep-mem_map, k);
+   if (k-tgt-refcount  1)
+ k-tgt-refcount--;
+   else
+ gomp_unmap_tgt (k-tgt);
+ }
+
+   gomp_mutex_unlock (devicep-lock);
+   break;
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_TO_PSET:
+   break;
+ default:
+   gomp_fatal (GOMP_target_enter_exit_data unhandled kind 0x%.2x,
+   kind);
+ }
+  }
 }
 
 void
diff --git a/libgomp/testsuite/libgomp.c/target-11.c 
b/libgomp/testsuite/libgomp.c/target-11.c
index b86097a..98882f0 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -9,6 +9,17 @@ void test_array_section (int *p)
 {
   #pragma omp target data map(alloc: p[0:N])
 {
+  int ok = 1;
+  for (int i = 10; i  10 + 4; i++)
+   p[i] = 997 * i;
+
+  #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+   for (int i = 10; i  10 + 4; i++)
+ if (p[i] != 997 * i)
+   ok = 0;
+
+  assert (ok);
+
   #pragma omp target map(always from:p[7:9])
for (int i = 0; i  N; i++)
  p[i] = i;
diff --git a/libgomp/testsuite/libgomp.c/target-12.c 

[PATCH] Fix simdclone pass for addressable linear/uniform parameters (PR middle-end/66702)

2015-06-30 Thread Jakub Jelinek
Hi!

As the first testcase shows, we were mishandling addressable uniform/linear
parameters, in that case keeping them to use the (D) ssa name for uniform
or doing the iteration for linear doesn't work, so we need to handle it
slightly differently.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to
trunk/5.2.

2015-06-30  Jakub Jelinek  ja...@redhat.com

PR middle-end/66702
* omp-low.c (simd_clone_adjust): Handle addressable linear
or uniform parameters or non-gimple type uniform parameters.

* testsuite/libgomp.c++/pr66702-1.C: New test.
* testsuite/libgomp.c++/pr66702-2.C: New test.

--- gcc/omp-low.c.jj2015-06-17 20:11:10.0 +0200
+++ gcc/omp-low.c   2015-06-30 11:38:24.550092307 +0200
@@ -13427,12 +13427,54 @@ simd_clone_adjust (struct cgraph_node *n
  uniform args with __builtin_assume_aligned (arg_N(D), alignment)
  lhs.  Handle linear by adding PHIs.  */
   for (unsigned i = 0; i  node-simdclone-nargs; i++)
-if (node-simdclone-args[i].alignment
-node-simdclone-args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
-(node-simdclone-args[i].alignment
-(node-simdclone-args[i].alignment - 1)) == 0
-TREE_CODE (TREE_TYPE (node-simdclone-args[i].orig_arg))
-  == POINTER_TYPE)
+if (node-simdclone-args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
+(TREE_ADDRESSABLE (node-simdclone-args[i].orig_arg)
+   || !is_gimple_reg_type
+   (TREE_TYPE (node-simdclone-args[i].orig_arg
+  {
+   tree orig_arg = node-simdclone-args[i].orig_arg;
+   if (is_gimple_reg_type (TREE_TYPE (orig_arg)))
+ iter1 = make_ssa_name (TREE_TYPE (orig_arg));
+   else
+ {
+   iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg));
+   gimple_add_tmp_var (iter1);
+ }
+   gsi = gsi_after_labels (entry_bb);
+   g = gimple_build_assign (iter1, orig_arg);
+   gsi_insert_before (gsi, g, GSI_NEW_STMT);
+   gsi = gsi_after_labels (body_bb);
+   g = gimple_build_assign (orig_arg, iter1);
+   gsi_insert_before (gsi, g, GSI_NEW_STMT);
+  }
+else if (node-simdclone-args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
+ DECL_BY_REFERENCE (node-simdclone-args[i].orig_arg)
+ TREE_CODE (TREE_TYPE (node-simdclone-args[i].orig_arg))
+   == REFERENCE_TYPE
+ TREE_ADDRESSABLE
+ (TREE_TYPE (TREE_TYPE (node-simdclone-args[i].orig_arg
+  {
+   tree orig_arg = node-simdclone-args[i].orig_arg;
+   tree def = ssa_default_def (cfun, orig_arg);
+   if (def  !has_zero_uses (def))
+ {
+   iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg)));
+   gimple_add_tmp_var (iter1);
+   gsi = gsi_after_labels (entry_bb);
+   g = gimple_build_assign (iter1, build_simple_mem_ref (def));
+   gsi_insert_before (gsi, g, GSI_NEW_STMT);
+   gsi = gsi_after_labels (body_bb);
+   g = gimple_build_assign (build_simple_mem_ref (def), iter1);
+   gsi_insert_before (gsi, g, GSI_NEW_STMT);
+ }
+  }
+else if (node-simdclone-args[i].alignment
+ node-simdclone-args[i].arg_type
+   == SIMD_CLONE_ARG_TYPE_UNIFORM
+ (node-simdclone-args[i].alignment
+ (node-simdclone-args[i].alignment - 1)) == 0
+ TREE_CODE (TREE_TYPE (node-simdclone-args[i].orig_arg))
+   == POINTER_TYPE)
   {
unsigned int alignment = node-simdclone-args[i].alignment;
tree orig_arg = node-simdclone-args[i].orig_arg;
@@ -13482,13 +13524,31 @@ simd_clone_adjust (struct cgraph_node *n
 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
   {
tree orig_arg = node-simdclone-args[i].orig_arg;
-   tree def = ssa_default_def (cfun, orig_arg);
gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
|| POINTER_TYPE_P (TREE_TYPE (orig_arg)));
-   if (def  !has_zero_uses (def))
+   tree def = NULL_TREE;
+   if (TREE_ADDRESSABLE (orig_arg))
+ {
+   def = make_ssa_name (TREE_TYPE (orig_arg));
+   iter1 = make_ssa_name (TREE_TYPE (orig_arg));
+   iter2 = make_ssa_name (TREE_TYPE (orig_arg));
+   gsi = gsi_after_labels (entry_bb);
+   g = gimple_build_assign (def, orig_arg);
+   gsi_insert_before (gsi, g, GSI_NEW_STMT);
+ }
+   else
+ {
+   def = ssa_default_def (cfun, orig_arg);
+   if (!def || has_zero_uses (def))
+ def = NULL_TREE;
+   else
+ {
+   iter1 = make_ssa_name (orig_arg);
+   iter2 = make_ssa_name (orig_arg);
+ }
+ }
+   if (def)
  {
-   iter1 = make_ssa_name (orig_arg);
-   iter2 = make_ssa_name (orig_arg);
phi = create_phi_node (iter1, body_bb);

[PATCH] Fix PR66704

2015-06-30 Thread Richard Biener

Committed as obvious.

Richard.

2015-06-30  Richard Biener  rguent...@suse.de

PR tree-optimization/66704
* tree-vect-data-refs.c (vect_setup_realignment): Use
make_ssa_name for non-SSA name source.

Index: gcc/tree-vect-data-refs.c
===
--- gcc/tree-vect-data-refs.c   (revision 225163)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -4857,7 +4857,10 @@ vect_setup_realignment (gimple stmt, gim
   ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
  NULL_TREE, init_addr, NULL, inc,
  true, inv_p);
-  new_temp = copy_ssa_name (ptr);
+  if (TREE_CODE (ptr) == SSA_NAME)
+   new_temp = copy_ssa_name (ptr);
+  else
+   new_temp = make_ssa_name (TREE_TYPE (ptr));
   new_stmt = gimple_build_assign
   (new_temp, BIT_AND_EXPR, ptr,
build_int_cst (TREE_TYPE (ptr),


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marek Polacek
On Tue, Jun 30, 2015 at 11:08:35AM +0200, Marc Glisse wrote:
 On Tue, 30 Jun 2015, Marek Polacek wrote:
 
 This moves a simple optimization.  Here it's plain to see how :c
 removes the need to duplicate code to handle commutativity.
 
 Note that the same transformation would work for plus and xor.
 
Sounds like a good follow-up.  I think moving from fold-const.c to
match.pd ought to be 1:1 for clarity.  I'll prepare a patch to also
handle +/^.

 I put some more converts into the pattern, but then it's turned
 out that I also need the tree_nop_conversion_p (otherwise we'd
 regress binop-notor2.c that uses booleans).
 
 I don't really see why removing tree_nop_conversion_p would regress anything
 (though you would probably need to build the all_ones constant in TREE_TYPE
 (@0) and convert that to type). For my curiosity, could you explain a bit
 more?

This wasn't all that clear to me.  The testcase in question is 

int
foo (_Bool a, _Bool b)
{
  return (a | (a == 0)) | ((b ^ 1) | b); 
}

this ought to be simplified to return 1.  Through various folding we
arrive at

(int) ~b | (int) b

so we'd turn that into -1 (all_ones_cst of type int).  But for boolean b
~b | b is always 1, right?

Marek


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Bernhard Reutner-Fischer
On June 30, 2015 10:46:21 AM GMT+02:00, Marek Polacek pola...@redhat.com 
wrote:

--- gcc/testsuite/gcc.dg/fold-ior-2.c
+++ gcc/testsuite/gcc.dg/fold-ior-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-cddce1 } */
+
+int
+fn1 (int x)
+{
+  return ~x | x;
+}
+
+int
+fn2 (int x)
+{
+  return x | ~x;
+}
+
+unsigned int
+fn3 (unsigned int x)
+{
+  return ~x | x;
+}
+
+unsigned int
+fn4 (unsigned int x)
+{
+  return ~x | x;
+}

What's the difference between fn3 and fn4?

Thanks,

+
+int
+fn5 (int x)
+{
+  return ~x | (unsigned) x;
+}
+
+int
+fn6 (int x)
+{
+  return (unsigned) ~x | x;
+}
+
+int
+fn7 (int x)
+{
+  return ~(unsigned) x | x;
+}
+
+/* { dg-final { scan-tree-dump-not ~ cddce1 } } */
+/* { dg-final { scan-tree-dump-not  \\|  cddce1 } } */

   Marek




Re: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-06-30 Thread Bernhard Reutner-Fischer
On June 30, 2015 10:16:01 AM GMT+02:00, Ajit Kumar Agarwal 
ajit.kumar.agar...@xilinx.com wrote:
All:

The below patch added a new path Splitting optimization pass on SSA
representation. The Path Splitting optimization
Pass moves the join block of if-then-else same as loop latch to its
predecessors and get merged with the predecessors
Preserving the SSA representation.

The patch is tested for Microblaze and i386 target. The EEMBC/Mibench
benchmarks is run with the Microblaze target
And the performance gain of 9.15% and rgbcmy01_lite(EEMBC benchmarks).
The Deja GNU tests is run for Mircroblaze 
Target and no regression is seen for Microblaze target and the new
testcase attached are passed.

For i386 bootstrapping goes through fine and the Spec cpu2000
benchmarks is run with this patch. Following observation
were seen with spec cpu2000 benchmarks. 

Ratio of path splitting change vs Ratio of not having path splitting
change is 3653.353 vs 3652.14 for INT benchmarks.
Ratio of path splitting change vs Ratio of not having path splitting
change is  4353.812 vs 4345.351 for FP benchmarks.

Based on comments from RFC patch following changes were done.

1. Added a new pass for path splitting changes.
2. Placed the new path  Splitting Optimization pass before the copy
propagation pass.
3. The join block same as the Loop latch is wired into its predecessors
so that the CFG Cleanup pass will merge the blocks
Wired together.
4. Copy propagation routines added for path splitting changes is not
needed as suggested by Jeff. They are removed in the patch as
The copy propagation in the copied join blocks will be done by the
existing copy propagation pass and the update ssa pass.
5. Only the propagation of phi results of the join block with the phi
argument is done which will not be done by the existing update_ssa
Or copy propagation pass on tree ssa representation.
6. Added 2 tests.
a) compilation check  tests.
   b) execution tests.

The 2 tests seem to be identical, so why do you have both?
Also, please remove cleanup-tree-dump, this is now done automatically.

Thanks,

7. Refactoring of the code for the feasibility check and finding the
join block same as loop latch node.




Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 11:08:35AM +0200, Marc Glisse wrote:
  On Tue, 30 Jun 2015, Marek Polacek wrote:
  
  This moves a simple optimization.  Here it's plain to see how :c
  removes the need to duplicate code to handle commutativity.
  
  Note that the same transformation would work for plus and xor.
  
 Sounds like a good follow-up.  I think moving from fold-const.c to
 match.pd ought to be 1:1 for clarity.

Yes, I agree fully here.

 I'll prepare a patch to also
 handle +/^.

Thanks.

  I put some more converts into the pattern, but then it's turned
  out that I also need the tree_nop_conversion_p (otherwise we'd
  regress binop-notor2.c that uses booleans).
  
  I don't really see why removing tree_nop_conversion_p would regress anything
  (though you would probably need to build the all_ones constant in TREE_TYPE
  (@0) and convert that to type). For my curiosity, could you explain a bit
  more?
 
 This wasn't all that clear to me.  The testcase in question is 
 
 int
 foo (_Bool a, _Bool b)
 {
   return (a | (a == 0)) | ((b ^ 1) | b); 
 }
 
 this ought to be simplified to return 1.  Through various folding we
 arrive at
 
 (int) ~b | (int) b
 
 so we'd turn that into -1 (all_ones_cst of type int).  But for boolean b
 ~b | b is always 1, right?

Actually our bools are signed ;)  Even for unsigned bools we'd
then simply build '1' via build_all_ones_cst.

Richard.


RE: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-06-30 Thread Ajit Kumar Agarwal


-Original Message-
From: Bernhard Reutner-Fischer [mailto:rep.dot@gmail.com] 
Sent: Tuesday, June 30, 2015 3:57 PM
To: Ajit Kumar Agarwal; l...@redhat.com; GCC Patches
Cc: Vinod Kathail; Shail Aditya Gupta; Vidhumouli Hunsigida; Nagaraju Mekala
Subject: Re: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa 
representation

On June 30, 2015 10:16:01 AM GMT+02:00, Ajit Kumar Agarwal 
ajit.kumar.agar...@xilinx.com wrote:
All:

The below patch added a new path Splitting optimization pass on SSA 
representation. The Path Splitting optimization Pass moves the join 
block of if-then-else same as loop latch to its predecessors and get 
merged with the predecessors Preserving the SSA representation.

The patch is tested for Microblaze and i386 target. The EEMBC/Mibench 
benchmarks is run with the Microblaze target And the performance gain 
of 9.15% and rgbcmy01_lite(EEMBC benchmarks).
The Deja GNU tests is run for Mircroblaze Target and no regression is 
seen for Microblaze target and the new testcase attached are passed.

For i386 bootstrapping goes through fine and the Spec cpu2000 
benchmarks is run with this patch. Following observation were seen with 
spec cpu2000 benchmarks.

Ratio of path splitting change vs Ratio of not having path splitting 
change is 3653.353 vs 3652.14 for INT benchmarks.
Ratio of path splitting change vs Ratio of not having path splitting 
change is  4353.812 vs 4345.351 for FP benchmarks.

Based on comments from RFC patch following changes were done.

1. Added a new pass for path splitting changes.
2. Placed the new path  Splitting Optimization pass before the copy 
propagation pass.
3. The join block same as the Loop latch is wired into its predecessors 
so that the CFG Cleanup pass will merge the blocks Wired together.
4. Copy propagation routines added for path splitting changes is not 
needed as suggested by Jeff. They are removed in the patch as The copy 
propagation in the copied join blocks will be done by the existing copy 
propagation pass and the update ssa pass.
5. Only the propagation of phi results of the join block with the phi 
argument is done which will not be done by the existing update_ssa Or 
copy propagation pass on tree ssa representation.
6. Added 2 tests.
a) compilation check  tests.
   b) execution tests.

The 2 tests seem to be identical, so why do you have both?
Also, please remove cleanup-tree-dump, this is now done automatically.

The testcase path-split-1.c  is to check for execution which is present in 
gcc.dg top directory . The one
present in the gcc.dg/tree-ssa/path-split-2.c is to check the compilation as 
the action item is compilation. For the
execution tests path-split-1.c the action is compile and run.

Thanks  Regards
Ajit

Thanks,

7. Refactoring of the code for the feasibility check and finding the 
join block same as loop latch node.




Re: Four jit backports to gcc 5 branch

2015-06-30 Thread David Malcolm
On Tue, 2015-06-30 at 08:43 +0200, Basile Starynkevitch wrote:
 On Mon, Jun 29, 2015 at 11:44:31AM -0400, David Malcolm wrote:
  I've gone over the changes to the gcc/jit and gcc/testsuite/jit.dg
  directories in trunk since gcc 5 and backported the following 4 changes
  from trunk to the gcc-5-branch:
  
 
 
 I'll be delighted if switch statements ability would be backported to
 GCC 5.  (Its absence is IMHO a severe bug in GCCJIT, but perhaps GCC rules
 forbid backporting new features, even when they correct a huge
 deficiency and when the patch adding them is probably quite small).

I don't agree with your characterization of the severity of this, but I
do think we ought to fix it if possible.

I don't know of any rules about what is and what isn't suitable for
backporting.  The jit work as a whole is relatively new and has been
messaged as experimental.

I'm the jit maintainer, and I believe the patch is reasonable to
backport: it doesn't break API or ABI (although it extends it), it has
docs and test coverage, and doesn't touch anything outside of the jit
(beyond adding the new header gcc/typed-splay-tree.h, which is only used
by the jit).  

Hence I intend to backport it to gcc-5-branch, but I've got to get it
into trunk first :)  (hopefully later today)

Dave



[PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Dhole
Hi!

We are working in Debian —and I know other free software projects
care— in providing our users with a way to reproduce bit-for-bit
identical binary packages from the source and build environment.
See https://wiki.debian.org/ReproducibleBuilds/About for some
rationale and further explanations.

In order to do this, we need to make the build processes deterministic.
As you can imagine, gcc is quite involved in producing Debian packages.
One issue we encounter in many packages that fail to build reproducibly
is the use of the __DATE__, __TIME__ C macros [1], right now we have 456
affected packages that would need patching (either removing the macros,
or passing a known date externally).

A solution for toolchain packages that embed timestamps during the build
process has been proposed for anyone interested and it consists of the
following:
The build environment can export an environment variable called
SOURCE_DATE_EPOCH with a known timestamp in Unix epoch format (In our
case, we use the last date of the package's debian changelog). The
toolchain package running during the build can check if the exported
variable is set and if so, instead of embedding the local date/time,
embed the date/time from SOURCE_DATE_EPOCH.

It would be very beneficial to our project (and other free software
projects working on reproducible builds) if gcc supported this feature.
I'm attaching a patch for gcc-5.1.0 that enables this feature: it
modifies the behavior of the macros __DATE__ and __TIME__ when
SOURCE_DATE_EPOCH is exported.

What do you think? Any suggestions or other ideas that help getting
reproducible builds are welcomed.

I'm willing to extend the documentation if the patch feels appropriate.

Thanks for your attention!

[1] https://wiki.debian.org/ReproducibleBuilds/TimestampsFromCPPMacros

Best regards,
Dhole
diff --git a/libcpp/macro.c b/libcpp/macro.c
index 1e0a0b5..a52e3cb 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -349,14 +349,38 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode 
*node)
 slow on some systems.  */
  time_t tt;
  struct tm *tb = NULL;
+ char *source_date_epoch;
 
- /* (time_t) -1 is a legitimate value for number of seconds
-since the Epoch, so we have to do a little dance to
-distinguish that from a genuine error.  */
- errno = 0;
- tt = time(NULL);
- if (tt != (time_t)-1 || errno == 0)
-   tb = localtime (tt);
+ /* Allow the date and time to be set externally by an exported
+environment variable to enable reproducible builds. */
+ source_date_epoch = getenv (SOURCE_DATE_EPOCH);
+ if (source_date_epoch)
+   {
+ errno = 0;
+ tt = (time_t) strtol (source_date_epoch, NULL, 10);
+ if (errno == 0)
+   {
+ tb = gmtime (tt);
+ if (tb == NULL)
+   cpp_error (pfile, CPP_DL_ERROR,
+  SOURCE_DATE_EPOCH=\%s\ is not a valid date,
+  source_date_epoch);
+   }
+ else
+   cpp_error (pfile, CPP_DL_ERROR,
+  SOURCE_DATE_EPOCH=\%s\ is not a valid number,
+  source_date_epoch);
+   }
+ else
+   {
+   /* (time_t) -1 is a legitimate value for number of seconds
+  since the Epoch, so we have to do a little dance to
+  distinguish that from a genuine error.  */
+   errno = 0;
+   tt = time(NULL);
+   if (tt != (time_t)-1 || errno == 0)
+ tb = localtime (tt);
+ }
 
  if (tb)
{


signature.asc
Description: OpenPGP digital signature


[PATCH][9/n] Remove GENERIC stmt combining from SCCVN

2015-06-30 Thread Richard Biener

This moves the rest of the patterns in match-bitwise.pd from the
match-and-simplify branch (those that make sense - I'll merge the
rest to the tail of match.pd on the branch).

Bootstrap  regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-06-30  Richard Biener  rguent...@suse.de

* fold-const.c (fold_binary_loc): Move ~X ^ X - -1 and
(X  Y) ^ Y - ~X  Y transforms to ...
* match.pd: ... here.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 225184)
+++ gcc/fold-const.c(working copy)
@@ -10979,24 +10925,6 @@ fold_binary_loc (location_t loc,
   goto bit_rotate;
 
 case BIT_XOR_EXPR:
-  /* ~X ^ X is -1.  */
-  if (TREE_CODE (arg0) == BIT_NOT_EXPR
-  operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
-   {
- t1 = build_zero_cst (type);
- t1 = fold_unary_loc (loc, BIT_NOT_EXPR, type, t1);
- return omit_one_operand_loc (loc, type, t1, arg1);
-   }
-
-  /* X ^ ~X is -1.  */
-  if (TREE_CODE (arg1) == BIT_NOT_EXPR
-  operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
-   {
- t1 = build_zero_cst (type);
- t1 = fold_unary_loc (loc, BIT_NOT_EXPR, type, t1);
- return omit_one_operand_loc (loc, type, t1, arg0);
-   }
-
   /* Fold (X  1) ^ 1 as (X  1) == 0.  */
   if (TREE_CODE (arg0) == BIT_AND_EXPR
   INTEGRAL_TYPE_P (type)
@@ -11005,45 +10933,6 @@ fold_binary_loc (location_t loc,
return fold_build2_loc (loc, EQ_EXPR, type, arg0,
build_zero_cst (TREE_TYPE (arg0)));
 
-  /* Fold (X  Y) ^ Y as ~X  Y.  */
-  if (TREE_CODE (arg0) == BIT_AND_EXPR
-  operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0))
-   {
- tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
- return fold_build2_loc (loc, BIT_AND_EXPR, type,
- fold_build1_loc (loc, BIT_NOT_EXPR, type, tem),
- fold_convert_loc (loc, type, arg1));
-   }
-  /* Fold (X  Y) ^ X as ~Y  X.  */
-  if (TREE_CODE (arg0) == BIT_AND_EXPR
-  operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0)
-  reorder_operands_p (TREE_OPERAND (arg0, 1), arg1))
-   {
- tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1));
- return fold_build2_loc (loc, BIT_AND_EXPR, type,
- fold_build1_loc (loc, BIT_NOT_EXPR, type, tem),
- fold_convert_loc (loc, type, arg1));
-   }
-  /* Fold X ^ (X  Y) as X  ~Y.  */
-  if (TREE_CODE (arg1) == BIT_AND_EXPR
-  operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
-   {
- tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1));
- return fold_build2_loc (loc, BIT_AND_EXPR, type,
- fold_convert_loc (loc, type, arg0),
- fold_build1_loc (loc, BIT_NOT_EXPR, type, tem));
-   }
-  /* Fold X ^ (Y  X) as ~Y  X.  */
-  if (TREE_CODE (arg1) == BIT_AND_EXPR
-  operand_equal_p (arg0, TREE_OPERAND (arg1, 1), 0)
-  reorder_operands_p (arg0, TREE_OPERAND (arg1, 0)))
-   {
- tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0));
- return fold_build2_loc (loc, BIT_AND_EXPR, type,
- fold_build1_loc (loc, BIT_NOT_EXPR, type, tem),
- fold_convert_loc (loc, type, arg0));
-   }
-
   /* See if this can be simplified into a rotate first.  If that
 is unsuccessful continue in the association code.  */
   goto bit_rotate;
Index: gcc/match.pd
===
--- gcc/match.pd(revision 225184)
+++ gcc/match.pd(working copy)
@@ -299,6 +299,11 @@ (define_operator_list swapped_tcc_compar
   (bit_xor @0 integer_all_onesp@1)
   (bit_not @0))
 
+/* ~X ^ X is -1.  */
+(simplify
+ (bit_xor:c (bit_not @0) @0)
+ { build_all_ones_cst (type); })
+
 /* x  ~0 - x  */
 (simplify
  (bit_and @0 integer_all_onesp)
@@ -429,6 +434,11 @@ (define_operator_list swapped_tcc_compar
  (bit_xor (convert? (bit_not @0)) INTEGER_CST@1)
  (bit_xor (convert @0) (bit_not @1)))
 
+/* Fold (X  Y) ^ Y as ~X  Y.  */
+(simplify
+ (bit_xor:c (bit_and:c @0 @1) @1)
+ (bit_and (bit_not @0) @1))
+
 
 (simplify
  (abs (abs@1 @0))


Re: [hsa] HSA: add support for function declaration emission and, fix RA.

2015-06-30 Thread Martin Liška
On 06/30/2015 03:28 PM, Michael Matz wrote:
 Hi,
 
 On Tue, 30 Jun 2015, Martin Liška wrote:
 
 Following patch implements emission of function declarations and removes 
 hsa_call_block_insn. The insn is replaced with a new hsa_arg_block_insn, 
 which will make insn iteration flat and much easier for register 
 allocator.
 
 Given that BRIG forces us to have a multi-level layering of instructions 
 in one way or the other, I indeed think this new representation is nicer, 
 so thanks.
 
 
 Ciao,
 Michael.
 

Yeah, I introduced the new insn which is used just as a placeholder that is used
to emit necessary block arguments. Having insns in a single stream just much 
nicer.

Martin


Re: [PATCH] top-level for libvtv: use normal (not raw_cxx) target exports

2015-06-30 Thread Michael Haubenwallner

On 06/30/2015 02:54 PM, Jonathan Wakely wrote:
 On 29/06/15 18:57 +0200, Michael Haubenwallner wrote:
 Actually, an unexpected libstdc++-v3/configure change is introduced by some
 inconsistency in 
 https://gcc.gnu.org/viewcvs/gcc?view=revisionrevision=223196
 where acinclude.m4 contains different code than the recreated configure:

 acinclude.m4-3973: case ${target_os} in
 acinclude.m4#3974:   gnu* | linux* | solaris*)
 acinclude.m4-3975: GCC_TRY_COMPILE_OR_LINK(

  configure-79218:   case ${target_os} in
  configure#79219:   gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu | 
 solaris*)
  configure-79220: if test x$gcc_no_link = xyes; then

 Not sure how to handle such kind of inconsistencies though...
 
 That's just a bug, there should be no inconsistency.

Yes, of course.

Though I'm unaware of any current continous integration setup for gcc, it's 
about
an idea for improvement here - doing something like buildbot-based check here:
Have one buildbot instance enabling maintainer-mode using the predefined 
autotools
versions, touch all the .in files, and yell if there is some svn diff after 
bootstrap.

Thanks!
/haubi/


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 02:47:49PM +0200, Richard Biener wrote:
  On Tue, 30 Jun 2015, Marek Polacek wrote:
  
   On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
Does my suggestion to build the all_ones constant in TREE_TYPE (@0) and
convert that to type help for that?
   
   It appears to work, but it seems weird to me to create a integer constant
   in one type and then immediately cast it to another type.
  
  Yes.  Do you have a testcase now that fails using bools?
 
 I don't have a testcase that fails with the pattern we currently have, i.e.
 the one with tree_nop_conversion_p.

I mean with removing tree_nop_conversion_p.

Richard.


[hsa] HSA: add support for function declaration emission and, fix RA.

2015-06-30 Thread Martin Liška
Hello.

Following patch implements emission of function declarations and removes 
hsa_call_block_insn.
The insn is replaced with a new hsa_arg_block_insn, which will make insn 
iteration flat and
much easier for register allocator.

Thanks,
Martin
From abb5eb3cf4f8569d2e003aabc5f078fcd17348a6 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Mon, 22 Jun 2015 17:51:11 +0200
Subject: [PATCH 1/2] HSA: add support for function declaration emission and
 fix RA.

gcc/ChangeLog:

2015-06-23  Martin Liska  mli...@suse.cz

	* hsa-brig.c (emit_directive_variable): Remove assert that
	is no longer valid.
	(emit_function_directives): Change the function that it accepts
	a hsa_function_representation and is used both for definitions and
	declarations.
	(emit_function_declaration): New function.
	(emit_arg_block): Remove.
	(emit_arg_block_insn): New function.
	(emit_insn): Add support for newly added hsa_insn_arg_block
	insn.
	(emit_call_insn): Use offsets of called functions because
	these function declaration are already emitted.
	(hsa_brig_emit_function): Emit function declarations for all called
	functions that haven't been seen yet.
	(hsa_output_brig): Do not process pseudo-linkage.
	* hsa-dump.c (dump_hsa_insn): Change argument type.
	(dump_hsa_bb): Fix indentation for hsa_arg_block_insn.
	* hsa-gen.c (hsa_deinit_data_for_cfun): Use newly added function
	release.
	(hsa_function_representation::hsa_function_representation): Add
	construction for newly added members.
	(hsa_function_representation::~hsa_function_representation): Add
	destruction for newly added members.
	(hsa_insn_arg_block::hsa_insn_arg_block): New function.
	(hsa_insn_arg_block::operator new): New function.
	(get_symbol_for_decl): Use new function.
	(gen_hsa_insns_for_direct_call): Save all called functions.
	(gen_hsa_insns_for_known_library_call): Use newly added function
	get_declaration_name.
	(get_function_arg_count): New.
	(gen_function_decl_parameters): Likewise.
	(gen_function_def_parameters): Add an optional argument and rename
	from:
	(gen_function_parameters): Removed.
	(hsa_generate_function_declaration): New.
	(generate_hsa): Fill up declaration for a hsa_function_representation.
	(hsa_init_data_for_cfun): Replace hsa_insn_call_block with hsa_insn_arg_block
	as template argument.
	* hsa-regalloc.c (naive_process_phi): Add assert.
	(hsa_num_def_ops): Change # of operands for call insn.
	(visit_insn): Remove.
	(remove_def_in_insn): Likewise.
	(merge_live_range_for_insn): Likewise.
	(linear_scan_regalloc): Simplify.
	* hsa.c	(get_declaration_name): New function.
	(hsa_free_decl_kernel_mapping): Add guard.
	* hsa.h (struct hsa_function_representation): Add new struct fields.
	(hsa_function_representation::release): New.

Fix call_block_insn and fix RA.

Fix RA and related stuff.
---
 gcc/hsa-brig.c | 240 ++---
 gcc/hsa-dump.c |  41 -
 gcc/hsa-gen.c  | 224 -
 gcc/hsa-regalloc.c | 136 +-
 gcc/hsa.c  |  20 -
 gcc/hsa.h  |  45 ++
 6 files changed, 372 insertions(+), 334 deletions(-)

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index d06fe42..ff697ee 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -128,10 +128,6 @@ struct function_linkage_pair
   unsigned int offset;
 };
 
-/* Vector of function calls where we need to resolve function offsets.  */
-
-static auto_vec function_linkage_pair function_call_linkage;
-
 /* Add a new chunk, allocate data for it and initialize it.  */
 
 void
@@ -511,7 +507,6 @@ emit_directive_variable (struct hsa_symbol *symbol)
   dirvar.type = htole16 (symbol-type);
   dirvar.segment = symbol-segment;
   dirvar.align = get_alignment (dirvar.type);
-  gcc_assert (symbol-linkage);
   dirvar.linkage = symbol-linkage;
   dirvar.dim.lo = (uint32_t) symbol-dim;
   dirvar.dim.hi = (uint32_t) ((unsigned long long) symbol-dim  32);
@@ -522,11 +517,11 @@ emit_directive_variable (struct hsa_symbol *symbol)
   return symbol-directive_offset;
 }
 
-/* Emit directives describing the function, for example its input and output
-   arguments, local variables etc.  */
+/* Emit directives describing either a function declaration or
+   definition F.  */
 
 static BrigDirectiveExecutable *
-emit_function_directives (void)
+emit_function_directives (hsa_function_representation *f)
 {
   struct BrigDirectiveExecutable fndir;
   unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
@@ -534,38 +529,43 @@ emit_function_directives (void)
   BrigDirectiveExecutable *ptr_to_fndir;
   hsa_symbol *sym;
 
-  name_offset = brig_emit_string (hsa_cfun-name, '');
+  name_offset = brig_emit_string (f-name, '');
   inarg_off = brig_code.total_size + sizeof(fndir)
-+ (hsa_cfun-output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
++ (f-output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
   scoped_off = inarg_off
-+ hsa_cfun-input_args_count * 

[hsa] Remove assumption that each translation unit must have a, kernel.

2015-06-30 Thread Martin Liška
Hello.

Attached patch is very simple and does $subject.

Thanks,
Martin
From ad7053eb82a354dce6a5460a3b3c5f1dba939094 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Thu, 25 Jun 2015 13:58:51 +0200
Subject: [PATCH 2/2] Remove assumption that each translation unit must have a
 kernel.

gcc/ChangeLog:

2015-06-25  Martin Liska  mli...@suse.cz

	* hsa-brig.c (hsa_output_kernel_mapping): Remove assert.
---
 gcc/hsa-brig.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index ff697ee..fcef710 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -1637,7 +1637,6 @@ static void
 hsa_output_kernel_mapping (tree brig_decl)
 {
   unsigned map_count = hsa_get_number_decl_kernel_mappings ();
-  gcc_assert (map_count  0);
 
   tree int_num_of_kernels;
   int_num_of_kernels = build_int_cst (integer_type_node, (int) map_count);
-- 
2.1.4



Re: [v3 PATCH] Implement N4387 and LWG 2367

2015-06-30 Thread Jonathan Wakely

On 13/06/15 21:05 +0300, Ville Voutilainen wrote:

Another round! The new patch
- makes sure not to change non-deduced parameters to deduced ones, and adds
tests for that.
- removes the name of the bool non-type template parameter for the 'concepts'.
- introduces shortcuts for making the use of the 'concepts' more readable.


These bits are lovely jubbly.


- changes library tag types into non-default-constructible ones.
Without this, we
can run into an ambiguity between the constructor template that takes
(const _T1, const _T2)
and the one that takes (allocator_arg_t, Alloc), when the call site
calls it with ({}, somethingelse).
We could avoid that by just not doing that in the library
(experimental string searchers do that),
but it's just daft to allow default-constructing a library tag type. I
have filed an LWG issue to
fix all tag types, but this patch doesn't yet touch nothrow_t and
optional's in_place_t. I expect
to submit follow-up patches for that.


I agree that default-constructing tag types doesn't make sense when we
have named constants to use. I also think it's a real pain in the
posterior that tupleT,U( {}, T{} } is ambiguous, but I'm not
comfortable with making all those tag types non-default constructible
until we get an indication how LWG feel about the proposal.

So I've removed that part of the patch and fixed the searcher that
used {} and became ambiguous. I have a follow-up patch coming that
will prevent the ambiguities differently.

Tested ppc64le-linux and committed to trunk.

commit b80baac2fa84492f90e9c4137190330cf7c53fee
Author: Jonathan Wakely jwak...@redhat.com
Date:   Mon Jun 29 15:07:36 2015 +0100

2015-06-30  Ville Voutilainen  ville.voutilai...@gmail.com

	Implement N4387, Improving pair and tuple, and LWG 2367.

	* include/bits/stl_pair.h (_ConstructiblePair,
	_ImplicitlyConvertiblePair, _MoveConstructiblePair,
	_ImplicitlyMoveConvertiblePair): New.
	(pair()): Constrain it.
	(pair(const _T1, const _T2), pair(const pair_U1, _U2),
	pair(_U1, const _T2), pair(const _T1, _U2), pair(_U1, _U2),
	pair(pair_U1, _U2)): Make conditionally explicit.
	* include/std/tuple (_TC, tuple::_TC2, tuple::TCC, tuple::TMC): New.
	(tuple()): Constrain it.
	(tuple(const _UElements...), tuple(_UElements...),
	tuple(const tuple_UElements...), tuple(tuple_UElements...),
	tuple(allocator_arg_t, const _Alloc, const _UElements...),
	tuple(allocator_arg_t, const _Alloc, _UElements...),
	tuple(allocator_arg_t, const _Alloc, const tuple_UElements...),
	tuple(allocator_arg_t, const _Alloc, tuple_UElements...),
	tuple(const pair_U1, _U2), tuple(pair_U1, _U2),
	tuple(allocator_arg_t, const _Alloc, const pair_U1, _U2),
	tuple(allocator_arg_t, const _Alloc, pair_U1, _U2)): Make
	conditionally explicit.
	* include/experimental/functional (__boyer_moore_array_base): Name
	array type explicitly instead of using an empty braced-init-list.
	* testsuite/20_util/pair/cons/explicit_construct.cc: New.
	* testsuite/20_util/pair/piecewise.cc: Use piecewise_construct.
	* testsuite/20_util/pair/requirements/dr2367.cc: New.
	* testsuite/20_util/tuple/cons/explicit_construct.cc: New.
	* testsuite/20_util/tuple/requirements/dr2367.cc: New.

diff --git a/libstdc++-v3/include/bits/stl_pair.h b/libstdc++-v3/include/bits/stl_pair.h
index 490b005..6672ecb 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -84,6 +84,38 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   templatestd::size_t...
 struct _Index_tuple;
+
+  // Concept utility functions, reused in conditionally-explicit
+  // constructors.
+  template typename _T1, typename _T2, typename _U1, typename _U2
+  constexpr bool _ConstructiblePair()
+  {
+return __and_is_constructible_T1, const _U1,
+		  is_constructible_T2, const _U2::value;
+  }
+
+  template typename _T1, typename _T2, typename _U1, typename _U2
+  constexpr bool _ImplicitlyConvertiblePair()
+  {
+return __and_is_convertibleconst _U1, _T1,
+		  is_convertibleconst _U2, _T2::value;
+  }
+
+  template typename _T1, typename _T2, typename _U1, typename _U2
+  constexpr bool _MoveConstructiblePair()
+  {
+return __and_is_constructible_T1, _U1,
+		  is_constructible_T2, _U2::value;
+  }
+
+  template typename _T1, typename _T2, typename _U1, typename _U2
+  constexpr bool _ImplicitlyMoveConvertiblePair()
+  {
+return __and_is_convertible_U1, _T1,
+		  is_convertible_U2, _T2::value;
+  }
+
+
 #endif
 
  /**
@@ -105,52 +137,136 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // 265.  std::pair::pair() effects overly restrictive
   /** The default constructor creates @c first and @c second using their
*  respective default constructors.  */
+#if __cplusplus = 201103L
+  template typename _U1 = _T1,
+typename _U2 = _T2,
+typename enable_if__and_
+ 

Re: [hsa] HSA: add support for function declaration emission and, fix RA.

2015-06-30 Thread Michael Matz
Hi,

On Tue, 30 Jun 2015, Martin Liška wrote:

 Following patch implements emission of function declarations and removes 
 hsa_call_block_insn. The insn is replaced with a new hsa_arg_block_insn, 
 which will make insn iteration flat and much easier for register 
 allocator.

Given that BRIG forces us to have a multi-level layering of instructions 
in one way or the other, I indeed think this new representation is nicer, 
so thanks.


Ciao,
Michael.

Re: [Patch SRA] Fix PR66119 by calling get_move_ratio in SRA

2015-06-30 Thread James Greenhalgh

On Fri, Jun 26, 2015 at 06:10:00PM +0100, Jakub Jelinek wrote:
 On Fri, Jun 26, 2015 at 06:03:34PM +0100, James Greenhalgh wrote:
  --- /dev/null
  +++ b/gcc/testsuite/g++.dg/pr66119.C

 I think generally testcases shouldn't be added into g++.dg/ directly,
 but subdirectories.  So g++.dg/opt/ ?

  @@ -0,0 +1,69 @@
  +/* PR66119 - MOVE_RATIO is not constant in a compiler run, so Scalar
  +   Reduction of Aggregates must ask the back-end more than once what
  +   the value of MOVE_RATIO now is.  */
  +
  +/* { dg-do compile  { target i?86-*-* x86_64-*-* } }  */

 In g++.dg/, dejagnu cycles through all 3 major -std=c* versions,
 thus using -std=c++11 is inappropriate.
 If the test requires c++11, instead you do
 // { dg-do compile { target { { i?86-*-* x86_64-*-* }  c++11 } } }

  +/* { dg-options -std=c++11 -O3 -mavx -fdump-tree-sra -march=slm { target 
  avx_runtime } } */

 and remove -std=c++11 here.  I don't see any point in guarding it with
 avx_runtime, after all, if not avx_runtime, the test will be compiled with
 -O0 and thus very likely fail the scan-tree-dump test.

 As it is dg-do compile test only, you have no dependency on assembler nor
 linker nor runtime.
 But I'd add -mtune=slm too.

Thanks, I'm used to the dance we try to do to get Neon enabled/disabled
correctly when testing multilib environments on ARM so tried to
overengineer things!

I've updated the testcase as you suggested, and moved it to g++.dg/opt.

OK?

Thanks,
James

---
gcc/

2015-06-30  James Greenhalgh  james.greenha...@arm.com

PR tree-optimization/66119
* toplev.c (process_options): Don't set up default values for
the sra_max_scalarization_size_{speed,size} parameters.
* tree-sra (analyze_all_variable_accesses): If no values
have been set for the sra_max_scalarization_size_{speed,size}
parameters, call get_move_ratio to get target defaults.

gcc/testsuite/

2015-06-30  James Greenhalgh  james.greenha...@arm.com

* g++.dg/opt/pr66119.C: New.

diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C b/gcc/testsuite/g++.dg/opt/pr66119.C
new file mode 100644
index 000..5b420c2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr66119.C
@@ -0,0 +1,69 @@
+/* PR66119 - MOVE_RATIO is not constant in a compiler run, so Scalar
+   Reduction of Aggregates must ask the back-end more than once what
+   the value of MOVE_RATIO now is.  */
+
+/* { dg-do compile  { target { { i?86-*-* x86_64-*-* }  c++11 } }  }  */
+/* { dg-options -O3 -mavx -fdump-tree-sra -march=slm -mtune=slm } */
+
+#include immintrin.h
+
+class MyAVX
+{
+  __m256d data;
+public:
+  MyAVX () = default;
+  MyAVX (const MyAVX ) = default;
+  MyAVX (__m256d _data) : data(_data) { ; }
+
+  MyAVX  operator= (const MyAVX ) = default;
+
+  operator __m256d () const { return data; }
+  MyAVX operator+ (MyAVX s2) { return data+s2.data; }
+};
+
+template typename T class AVX_trait { ; };
+
+template  class AVX_traitdouble {
+public:
+  typedef __m256d TSIMD;
+};
+
+
+template typename T
+class MyTSIMD
+{
+  typename AVX_traitT::TSIMD data;
+
+public:
+  MyTSIMD () = default;
+  MyTSIMD (const MyTSIMD ) = default;
+  // MyTSIMD (const MyTSIMD  s2) : data(s2.data) { ; }
+  MyTSIMD (typename AVX_traitT::TSIMD _data) : data(_data) { ; }
+
+  operator typename AVX_traitT::TSIMD() const { return data; }
+  MyTSIMD operator+ (MyTSIMD s2) { return data+s2.data; }
+};
+
+// using MyVec = MyAVX;
+using MyVec = MyTSIMDdouble;
+
+class Vec2
+{
+  MyVec a, b;
+public:
+  Vec2 (MyVec aa, MyVec ab) : a(aa), b(ab) { ; }
+  Vec2 operator+ (Vec2 v2) { return Vec2(a+v2.a, b+v2.b); }
+};
+
+inline __attribute__ ((__always_inline__))
+Vec2 ComputeSomething (Vec2 a, Vec2 b)
+{
+  return a+b;
+}
+
+Vec2 TestFunction (Vec2 a, Vec2 b)
+{
+  return ComputeSomething (a,b);
+}
+
+/* { dg-final { scan-tree-dump Created a replacement for b sra } } */
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 573b144..d97d852 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1299,20 +1299,6 @@ process_options (void)
  so we can correctly initialize debug output.  */
   no_backend = lang_hooks.post_options (main_input_filename);
 
-  /* Set default values for parameters relation to the Scalar Reduction
- of Aggregates passes (SRA and IP-SRA).  We must do this here, rather
- than in opts.c:default_options_optimization as historically these
- tuning heuristics have been based on MOVE_RATIO, which on some
- targets requires other symbols from the backend.  */
-  maybe_set_param_value
-(PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED,
- get_move_ratio (true) * UNITS_PER_WORD,
- global_options.x_param_values, global_options_set.x_param_values);
-  maybe_set_param_value
-(PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE,
- get_move_ratio (false) * UNITS_PER_WORD,
- global_options.x_param_values, global_options_set.x_param_values);
-
   /* Some machines may reject certain combinations of options.  */
   targetm.target_option.override ();
 
diff --git 

[patch] Use templatetypename not templateclass in std::pair

2015-06-30 Thread Jonathan Wakely

An unimportant change to meet our usual convention.

Tested powerpc64le-linux, committed to trunk.
commit a4461920b9169f8d07c8827fa77f6500573ed5a2
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Jun 30 14:27:43 2015 +0100

	* include/bits/stl_pair.h: Replace class keyword with typename.

diff --git a/libstdc++-v3/include/bits/stl_pair.h b/libstdc++-v3/include/bits/stl_pair.h
index 6672ecb..a5a7898 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -124,7 +124,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  @tparam _T1  Type of first object.
*  @tparam _T2  Type of second object.
*/
-  templateclass _T1, class _T2
+  templatetypename _T1, typename _T2
 struct pair
 {
   typedef _T1 first_type;/// @c first_type is the first bound type
@@ -170,18 +170,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   /** There is also a templated copy ctor for the @c pair class itself.  */
 #if __cplusplus  201103L
-  templateclass _U1, class _U2
+  templatetypename _U1, typename _U2
 	pair(const pair_U1, _U2 __p)
 	: first(__p.first), second(__p.second) { }
 #else
-  templateclass _U1, class _U2, typename
+  templatetypename _U1, typename _U2, typename
 	   enable_if_ConstructiblePair_T1, _T2, _U1, _U2()
   _ImplicitlyConvertiblePair_T1, _T2, _U1, _U2(),
  bool::type=true
 constexpr pair(const pair_U1, _U2 __p)
 : first(__p.first), second(__p.second) { }
 
-  templateclass _U1, class _U2, typename
+  templatetypename _U1, typename _U2, typename
enable_if_ConstructiblePair_T1, _T2, _U1, _U2()
   !_ImplicitlyConvertiblePair_T1, _T2, _U1, _U2(),
  bool::type=false
@@ -192,7 +192,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   constexpr pair(pair) = default;
 
   // DR 811.
-  templateclass _U1, typename
+  templatetypename _U1, typename
enable_if_ConstructiblePair_T2, _T2, _T2, _T2()
   _MoveConstructiblePair_T1, _T2, _U1, _T2()
   _ImplicitlyConvertiblePair_T2, _T2, _T2, _T2()
@@ -202,7 +202,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
constexpr pair(_U1 __x, const _T2 __y)
: first(std::forward_U1(__x)), second(__y) { }
 
-  templateclass _U1, typename
+  templatetypename _U1, typename
enable_if_ConstructiblePair_T2, _T2, _T2, _T2()
   _MoveConstructiblePair_T1, _T2, _U1, _T2()
   (!_ImplicitlyConvertiblePair_T2, _T2, _T2, _T2()
@@ -212,7 +212,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
explicit constexpr pair(_U1 __x, const _T2 __y)
: first(std::forward_U1(__x)), second(__y) { }
 
-  templateclass _U2, typename
+  templatetypename _U2, typename
enable_if_ConstructiblePair_T1, _T1, _T1, _T1()
   _MoveConstructiblePair_T1, _T2, _T1, _U2()
   _ImplicitlyConvertiblePair_T1, _T1, _T1, _T1()
@@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
constexpr pair(const _T1 __x, _U2 __y)
: first(__x), second(std::forward_U2(__y)) { }
 
-  templateclass _U2, typename
+  templatetypename _U2, typename
enable_if_ConstructiblePair_T1, _T1, _T1, _T1()
   _MoveConstructiblePair_T1, _T2, _T1, _U2()
   (!_ImplicitlyConvertiblePair_T1, _T1, _T1, _T1()
@@ -232,7 +232,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
explicit pair(const _T1 __x, _U2 __y)
: first(__x), second(std::forward_U2(__y)) { }
 
-  templateclass _U1, class _U2, typename
+  templatetypename _U1, typename _U2, typename
 	   enable_if_MoveConstructiblePair_T1, _T2, _U1, _U2()
   _ImplicitlyMoveConvertiblePair_T1, _T2,
 			   _U1, _U2(),
@@ -240,7 +240,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	constexpr pair(_U1 __x, _U2 __y)
 	: first(std::forward_U1(__x)), second(std::forward_U2(__y)) { }
 
-  templateclass _U1, class _U2, typename
+  templatetypename _U1, typename _U2, typename
 	   enable_if_MoveConstructiblePair_T1, _T2, _U1, _U2()
   !_ImplicitlyMoveConvertiblePair_T1, _T2,
 			_U1, _U2(),
@@ -249,7 +249,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	: first(std::forward_U1(__x)), second(std::forward_U2(__y)) { }
 
 
-  templateclass _U1, class _U2, typename
+  templatetypename _U1, typename _U2, typename
 	   enable_if_MoveConstructiblePair_T1, _T2, _U1, _U2()
   _ImplicitlyMoveConvertiblePair_T1, _T2,
 			   _U1, _U2(),
@@ -258,7 +258,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	: first(std::forward_U1(__p.first)),
 	  second(std::forward_U2(__p.second)) { }
 
-  templateclass _U1, class _U2, typename
+  templatetypename _U1, typename _U2, typename
 	   enable_if_MoveConstructiblePair_T1, _T2, _U1, 

Re: [AArch64][TLSLE][3/N] Add UNSPEC_TLSLE

2015-06-30 Thread Jiong Wang

Christophe Lyon writes:

 Hi Jiong,

 It seems to me that you committed patches 1/N, 2/N and 3/N as only 1
 commit (rev 225121).

 Am I right?

Yes, 1/2/3 are quite tiny, and seperated for easy review. While I
committed them as one patch.


 On 26 June 2015 at 16:39, Marcus Shawcroft marcus.shawcr...@gmail.com wrote:
 On 20 May 2015 at 12:21, Jiong Wang jiong.w...@arm.com wrote:

 Add new unspec name UNSPEC_TLSLE, use it for all tlsle pattern.

 ok for trunk?

 2015-05-19  Jiong Wang  jiong.w...@arm.com

 gcc/
   * config/aarch64/aarch64.md (UNSPEC_TLSLE): New enumeration.
   (tlsle): Use new unspec name.
   (tlsle_mode): Ditto.

 OK /Marcus

-- 
Regards,
Jiong



Re: [PATCH] Make muser-mode the default for LEON3

2015-06-30 Thread Eric Botcazou
 For all LEON3 (with CASA support) the CASA instruction works in
 supervisor-mode regardless of ASI used. In user-mode CASA only works
 with the user-mode ASI. So CASA with user-mode ASI works for both
 user-mode and supervisor-mode. By having user-mode ASI as default, one
 would not need to change flag when compiling user or kernel code.

Thanks, OK for mainline, 5 branch and 4.9 branch.

-- 
Eric Botcazou


[PATCH][8/n] Remove GENERIC stmt combining from SCCVN

2015-06-30 Thread Richard Biener

The following moves some bitwise patterns from the match-and-simplify
branch, extending them with proper conditional converts and removing
the corresponding patterns from fold-const.c

Bootstrap  regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2015-06-30  Richard Biener  rguent...@suse.de

* fold-const.c (fold_binary_loc): Move ~x  ~y - ~(x | y) and
~x | ~y - ~(x  y), (x  CST) ^ (x  CST2) - (x  CST) | (x  CST2),
(X | Y) ^ X - Y  ~ X, ~X ^ ~Y to X ^ Y and ~X ^ C to X ^ ~C ...
* match.pd: ... to patterns here.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 225164)
+++ gcc/fold-const.c(working copy)
@@ -10996,24 +10996,6 @@ fold_binary_loc (location_t loc,
   if (t1 != NULL_TREE)
return t1;
 
-  /* Convert (or (not arg0) (not arg1)) to (not (and (arg0) (arg1))).
-
-This results in more efficient code for machines without a NAND
-instruction.  Combine will canonicalize to the first form
-which will allow use of NAND instructions provided by the
-backend if they exist.  */
-  if (TREE_CODE (arg0) == BIT_NOT_EXPR
-  TREE_CODE (arg1) == BIT_NOT_EXPR)
-   {
- return
-   fold_build1_loc (loc, BIT_NOT_EXPR, type,
-build2 (BIT_AND_EXPR, type,
-fold_convert_loc (loc, type,
-  TREE_OPERAND (arg0, 0)),
-fold_convert_loc (loc, type,
-  TREE_OPERAND (arg1, 0;
-   }
-
   /* See if this can be simplified into a rotate first.  If that
 is unsuccessful continue in the association code.  */
   goto bit_rotate;
@@ -11037,90 +11019,6 @@ fold_binary_loc (location_t loc,
  return omit_one_operand_loc (loc, type, t1, arg0);
}
 
-  /* If we are XORing two BIT_AND_EXPR's, both of which are and'ing
- with a constant, and the two constants have no bits in common,
-we should treat this as a BIT_IOR_EXPR since this may produce more
-simplifications.  */
-  if (TREE_CODE (arg0) == BIT_AND_EXPR
-  TREE_CODE (arg1) == BIT_AND_EXPR
-  TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
-  TREE_CODE (TREE_OPERAND (arg1, 1)) == INTEGER_CST
-  wi::bit_and (TREE_OPERAND (arg0, 1),
- TREE_OPERAND (arg1, 1)) == 0)
-   {
- code = BIT_IOR_EXPR;
- goto bit_ior;
-   }
-
-  /* (X | Y) ^ X - Y  ~ X*/
-  if (TREE_CODE (arg0) == BIT_IOR_EXPR
-   operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
-{
- tree t2 = TREE_OPERAND (arg0, 1);
- t1 = fold_build1_loc (loc, BIT_NOT_EXPR, TREE_TYPE (arg1),
-   arg1);
- t1 = fold_build2_loc (loc, BIT_AND_EXPR, type,
-   fold_convert_loc (loc, type, t2),
-   fold_convert_loc (loc, type, t1));
- return t1;
-   }
-
-  /* (Y | X) ^ X - Y  ~ X*/
-  if (TREE_CODE (arg0) == BIT_IOR_EXPR
-   operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0))
-{
- tree t2 = TREE_OPERAND (arg0, 0);
- t1 = fold_build1_loc (loc, BIT_NOT_EXPR, TREE_TYPE (arg1),
-   arg1);
- t1 = fold_build2_loc (loc, BIT_AND_EXPR, type,
-   fold_convert_loc (loc, type, t2),
-   fold_convert_loc (loc, type, t1));
- return t1;
-   }
-
-  /* X ^ (X | Y) - Y  ~ X*/
-  if (TREE_CODE (arg1) == BIT_IOR_EXPR
-   operand_equal_p (TREE_OPERAND (arg1, 0), arg0, 0))
-{
- tree t2 = TREE_OPERAND (arg1, 1);
- t1 = fold_build1_loc (loc, BIT_NOT_EXPR, TREE_TYPE (arg0),
-   arg0);
- t1 = fold_build2_loc (loc, BIT_AND_EXPR, type,
-   fold_convert_loc (loc, type, t2),
-   fold_convert_loc (loc, type, t1));
- return t1;
-   }
-
-  /* X ^ (Y | X) - Y  ~ X*/
-  if (TREE_CODE (arg1) == BIT_IOR_EXPR
-   operand_equal_p (TREE_OPERAND (arg1, 1), arg0, 0))
-{
- tree t2 = TREE_OPERAND (arg1, 0);
- t1 = fold_build1_loc (loc, BIT_NOT_EXPR, TREE_TYPE (arg0),
-   arg0);
- t1 = fold_build2_loc (loc, BIT_AND_EXPR, type,
-   fold_convert_loc (loc, type, t2),
-   fold_convert_loc (loc, type, t1));
- return t1;
-   }
-
-  /* Convert ~X ^ ~Y to X ^ Y.  */
-  if (TREE_CODE (arg0) == BIT_NOT_EXPR
-  TREE_CODE (arg1) == BIT_NOT_EXPR)
-   return fold_build2_loc (loc, code, type,
-   fold_convert_loc (loc, type,
- TREE_OPERAND (arg0, 0)),
-   

Re: [PATCH, 2/2][PR66642] Add empty loop exit block in transform_to_exit_first_loop_alt

2015-06-30 Thread Tom de Vries

On 25/06/15 09:43, Tom de Vries wrote:

Hi,

I ran into a failure with parloops for reduction loop testcase
libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c.  When we
exercise the low iteration count loop, the test-case fails.

To understand the problem, let's first look at what happens when we use
transform_to_exit_first_loop (the original one) instead of
transform_to_exit_first_loop_alt (the alternative one, which is
currently used, and causing the failure).

Before transform_to_exit_first_loop, the low iteration count loop and
the main loop share the loop exit block. After
transform_to_exit_first_loop, that's not the case anymore, the main loop
now has an exit block with a single predecessor. Subsequently,
separate_decls_in_region inserts code in the main loop exit block, which
is only triggered upon exit of the main loop.

However, transform_to_exit_first_loop_alt does not insert such an exit
block, and the code inserted by separate_decls_in_region is also active
for the low iteration count loop, which results in an incorrect
reduction result when the low iteration count loop is used.


This patch fixes the problem by making sure
transform_to_exit_first_loop_alt adds a new exit block inbetween the
main loop header and the old exit block.




Updated test-cases after commit of fix for PR66652, reposting.


Bootstrapped and reg-tested on x86_64.

OK for trunk?



Thanks,
- Tom

Add empty loop exit block in transform_to_exit_first_loop_alt

2015-06-24  Tom de Vries  t...@codesourcery.com

	PR tree-optimization/66642
	* tree-parloops.c (transform_to_exit_first_loop_alt): Update function
	header comment.  Rename split_edge variable to edge_at_split.  Split
	exit edge to create new loop exit bb.  Insert loop exit phis in new loop
	exit bb.

	* testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c (main): Test low
	iteration count case.
	* testsuite/libgomp.c/parloops-exit-first-loop-alt.c (init): New
	function, factor out of ...
	(main): ... here.  Test low iteration count case.
---
 gcc/tree-parloops.c| 45 --
 .../libgomp.c/parloops-exit-first-loop-alt-3.c |  5 +++
 .../libgomp.c/parloops-exit-first-loop-alt.c   | 28 +-
 3 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 21ed17b..19c1aa5 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1535,7 +1535,7 @@ replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
  goto bb header
 
  bb exit:
- sum_z = PHI sum_b (cond[1])
+ sum_z = PHI sum_b (cond[1]), ...
 
  [1] Where bb cond is single_pred (bb latch); In the simplest case,
 	 that's bb header.
@@ -1562,14 +1562,17 @@ replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
  if (ivtmp_c  n + 1)
goto bb header;
  else
-   goto bb exit;
+   goto bb newexit;
 
  bb latch:
  ivtmp_b = ivtmp_a + 1;
  goto bb newheader
 
+ bb newexit:
+ sum_y = PHI sum_c (newheader)
+
  bb exit:
- sum_z = PHI sum_c (newheader)
+ sum_z = PHI sum_y (newexit), ...
 
 
In unified diff format:
@@ -1606,9 +1609,12 @@ replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
 - goto bb header
 + goto bb newheader
 
++bb newexit:
++sum_y = PHI sum_c (newheader)
+
   bb exit:
-- sum_z = PHI sum_b (cond[1])
-+ sum_z = PHI sum_c (newheader)
+- sum_z = PHI sum_b (cond[1]), ...
++ sum_z = PHI sum_y (newexit), ...
 
Note: the example does not show any virtual phis, but these are handled more
or less as reductions.
@@ -1646,7 +1652,7 @@ transform_to_exit_first_loop_alt (struct loop *loop,
 
   /* Create the new_header block.  */
   basic_block new_header = split_block_before_cond_jump (exit-src);
-  edge split_edge = single_pred_edge (new_header);
+  edge edge_at_split = single_pred_edge (new_header);
 
   /* Redirect entry edge to new_header.  */
   edge entry = loop_preheader_edge (loop);
@@ -1663,9 +1669,9 @@ transform_to_exit_first_loop_alt (struct loop *loop,
   e = redirect_edge_and_branch (post_cond_edge, header);
   gcc_assert (e == post_cond_edge);
 
-  /* Redirect split_edge to latch.  */
-  e = redirect_edge_and_branch (split_edge, latch);
-  gcc_assert (e == split_edge);
+  /* Redirect edge_at_split to latch.  */
+  e = redirect_edge_and_branch (edge_at_split, latch);
+  gcc_assert (e == edge_at_split);
 
   /* Set the new loop bound.  */
   gimple_cond_set_rhs (cond_stmt, bound);
@@ -1718,21 +1724,36 @@ transform_to_exit_first_loop_alt (struct loop *loop,
   /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
   flush_pending_stmts (post_inc_edge);
 
-  /* Register the reduction exit phis.  */
+  /* Create a new empty exit block, inbetween the new loop header and the old
+ exit block.  The function separate_decls_in_region needs this block to
+ insert code that is active on loop exit, but not any other path.  */
+  basic_block 

[gomp4, committed] Insert new exit block only when needed in transform_to_exit_first_loop_alt

2015-06-30 Thread Tom de Vries

Hi,

while testing the (now committed) fix for PR66642 (Add empty loop exit 
block in transform_to_exit_first_loop_alt) on the gomp-4_0-branch I ran 
into an ICE for the kernels-reduction.c testcase.


For oacc kernels, we don't generate a low iteration count loop, so the 
exit block of the loop already may have a single predecessor, so there's 
no need to add an empty loop exit block.


This patch conditionalizes the fix for PR66642 and fixes the ICE.

Bootstrapped and reg-tested on x86_64 on top of gomp-4_0-branch, and 
committed.


Thanks,
- Tom
Insert new exit block only when needed in transform_to_exit_first_loop_alt

2015-06-30  Tom de Vries  t...@codesourcery.com

	* tree-parloops.c (transform_to_exit_first_loop_alt): Insert new exit
	block only when needed.
---
 gcc/tree-parloops.c | 42 --
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 9c04308..04708c0 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -1734,10 +1734,15 @@ transform_to_exit_first_loop_alt (struct loop *loop,
   /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
   flush_pending_stmts (post_inc_edge);
 
-  /* Create a new empty exit block, inbetween the new loop header and the old
- exit block.  The function separate_decls_in_region needs this block to
- insert code that is active on loop exit, but not any other path.  */
-  basic_block new_exit_block = split_edge (exit);
+
+  basic_block new_exit_block = NULL;
+  if (!single_pred_p (exit-dest))
+{
+  /* Create a new empty exit block, inbetween the new loop header and the
+	 old exit block.  The function separate_decls_in_region needs this block
+	 to insert code that is active on loop exit, but not any other path.  */
+  new_exit_block = split_edge (exit);
+}
 
   /* Insert and register the reduction exit phis.  */
   for (gphi_iterator gsi = gsi_start_phis (exit_block);
@@ -1745,17 +1750,24 @@ transform_to_exit_first_loop_alt (struct loop *loop,
gsi_next (gsi))
 {
   gphi *phi = gsi.phi ();
+  gphi *nphi = NULL;
   tree res_z = PHI_RESULT (phi);
+  tree res_c;
 
-  /* Now that we have a new exit block, duplicate the phi of the old exit
-	 block in the new exit block to preserve loop-closed ssa.  */
-  edge succ_new_exit_block = single_succ_edge (new_exit_block);
-  edge pred_new_exit_block = single_pred_edge (new_exit_block);
-  tree res_y = copy_ssa_name (res_z, phi);
-  gphi *nphi = create_phi_node (res_y, new_exit_block);
-  tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
-  add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
-  add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
+  if (new_exit_block != NULL)
+	{
+	  /* Now that we have a new exit block, duplicate the phi of the old
+	 exit block in the new exit block to preserve loop-closed ssa.  */
+	  edge succ_new_exit_block = single_succ_edge (new_exit_block);
+	  edge pred_new_exit_block = single_pred_edge (new_exit_block);
+	  tree res_y = copy_ssa_name (res_z, phi);
+	  nphi = create_phi_node (res_y, new_exit_block);
+	  res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
+	  add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
+	  add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
+	}
+  else
+	res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
 
   if (virtual_operand_p (res_z))
 	continue;
@@ -1763,7 +1775,9 @@ transform_to_exit_first_loop_alt (struct loop *loop,
   gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
   struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
   if (red != NULL)
-	red-keep_res = nphi;
+	red-keep_res = (nphi != NULL
+			 ? nphi
+			 : phi);
 }
 
   /* We're going to cancel the loop at the end of gen_parallel_loop, but until
-- 
1.9.1



Re: [PATCH][AArch64][PR 66136] rewrite geniterators.sh in awk

2015-06-30 Thread Szabolcs Nagy


On 02/06/15 18:01, Szabolcs Nagy wrote:
 On 01/06/15 13:55, Marcus Shawcroft wrote:
 On 18 May 2015 at 15:57, Szabolcs Nagy szabolcs.n...@arm.com wrote:
 Rewrote the generator script in awk, to avoid dealing with
 sed portability issues.

 gcc/Changelog:

 2015-05-18  Szabolcs Nagy  szabolcs.n...@arm.com

 PR target/66136
 * config/aarch64/geniterators.sh: Rewrite in awk.

 OK provide you have checked the generated output is identical before
 and after this patch.  Ask for an account on sourceware here
 https://sourceware.org/cgi-bin/pdw/ps_form.cgi you want write on
 approval for gcc.

 /Marcus

 
 commited in r224031.
 

backported to gcc-5-branch in r225170.

as requested in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66136



RE: [Patch,tree-optimization]: Add new path Splitting pass on tree ssa representation

2015-06-30 Thread Bernhard Reutner-Fischer
On June 30, 2015 12:38:13 PM GMT+02:00, Ajit Kumar Agarwal 
ajit.kumar.agar...@xilinx.com wrote:

6. Added 2 tests.
a) compilation check  tests.
   b) execution tests.

The 2 tests seem to be identical, so why do you have both?
Also, please remove cleanup-tree-dump, this is now done
automatically.

The testcase path-split-1.c  is to check for execution which is present
in gcc.dg top directory . The one
present in the gcc.dg/tree-ssa/path-split-2.c is to check the
compilation as the action item is compilation. For the
execution tests path-split-1.c the action is compile and run.

One is a superset of the other, no?
Doesn't make sense to me, fwiw.



Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marek Polacek
On Tue, Jun 30, 2015 at 03:13:14PM +0200, Richard Biener wrote:
 On Tue, 30 Jun 2015, Marek Polacek wrote:
 
  On Tue, Jun 30, 2015 at 02:47:49PM +0200, Richard Biener wrote:
   On Tue, 30 Jun 2015, Marek Polacek wrote:
   
On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
 Does my suggestion to build the all_ones constant in TREE_TYPE (@0) 
 and
 convert that to type help for that?

It appears to work, but it seems weird to me to create a integer 
constant
in one type and then immediately cast it to another type.
   
   Yes.  Do you have a testcase now that fails using bools?
  
  I don't have a testcase that fails with the pattern we currently have, i.e.
  the one with tree_nop_conversion_p.
 
 I mean with removing tree_nop_conversion_p.

Aha.  With tree_nop_conversion_p removed, gcc.dg/binop-notor2.c fails,
because there we optimize the return statement to return -1 instead
of return 1.
https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02179.html

Marek


Re: [Patch, fortran] PR52846 - [F2008] Support submodules

2015-06-30 Thread FX
Hi Paul,

I don’t feel confident enough in many parts of the code (including the module 
part) to formally review it, but from what I’ve read it seemed rather logical 
and well-commented. If it regtests fine, I think your plan (especially at the 
current GCC stage) of committing this week is sound.

One question I had is: does this change the .mod file format in any case? I 
don’t think, cause you don’t seem to bump the version number, but have you 
checked on specific cases (like, the mega cp2k example) that the patch indeed 
does not change existing module files (the ones that do not use submodules)?

Cheers, and thanks for this patch!

FX



Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 03:13:14PM +0200, Richard Biener wrote:
  On Tue, 30 Jun 2015, Marek Polacek wrote:
  
   On Tue, Jun 30, 2015 at 02:47:49PM +0200, Richard Biener wrote:
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
  Does my suggestion to build the all_ones constant in TREE_TYPE 
  (@0) and
  convert that to type help for that?
 
 It appears to work, but it seems weird to me to create a integer 
 constant
 in one type and then immediately cast it to another type.

Yes.  Do you have a testcase now that fails using bools?
   
   I don't have a testcase that fails with the pattern we currently have, 
   i.e.
   the one with tree_nop_conversion_p.
  
  I mean with removing tree_nop_conversion_p.
 
 Aha.  With tree_nop_conversion_p removed, gcc.dg/binop-notor2.c fails,
 because there we optimize the return statement to return -1 instead
 of return 1.
 https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02179.html

Hmm ok.  That testcase is basically

int foo (_Bool a)
{
  return ((int) a) | ((int) ~a);
}

where indeed with unsigned bool (yeah, our bool is unsigned) we
get zero-extension on both arms.  Similar issue would show up with

int foo (unsigned char a)
{
  return ((int) a) | ((int) ~a);
}

so it's not specific to bools.  So yes, the suggestion to
do

  (convert { build_all_ones_cst (TREE_TYPE (@0)); })

would work here.

Richard.

-- 
Richard Biener rguent...@suse.de
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham 
Norton, HRB 21284 (AG Nuernberg)


Re: [PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Manuel López-Ibáñez

On 30/06/15 15:18, Dhole wrote:

A solution for toolchain packages that embed timestamps during the build
process has been proposed for anyone interested and it consists of the
following:


Perhaps this has been discussed and discarded before (if so I would appreciate 
if you could point me to the relevant discussion), why not simply redefine 
__DATE__ and __TIME__ to an appropriate string via the command-line or a dummy 
include?


That probably triggers some warnings (or it may not be supported at all, I 
haven't tried myself), but fixing those issues leads to a more general solution 
than GCC reacting to an arbitrary variable name and changing its behaviour 
quite silently.


Cheers,

Manuel.


[committed] Run testsuite/libgomp.oacc-c++/c++.exp at -O2 by default

2015-06-30 Thread Tom de Vries

[ was: Re: [patch] Run testsuite/libgomp.c++/c++.exp at -O2 by default ]

On 30/06/15 14:07, Jakub Jelinek wrote:

On Mon, Jun 15, 2015 at 06:36:17PM +0200, Tom de Vries wrote:

Hi,

this patch:
- sets DEFAULT_CFLAGS to -O2, if not set otherwise
   (similar to what is done in c.exp)
- removes superfluous dg-options -O2 settings.
- removes superfluous dg-options -fopenmp settings.
- uses dg-additional-options for -std=standard settings

Tested on x86_64.

OK for trunk?


Ok, thanks.



I've also committed this patch, as obvious. It does the same, but for 
libgomp.oacc-c++/c++.exp (which was based on libgomp.c++/c++.exp).


Thanks,
- Tom


Run testsuite/libgomp.oacc-c++/c++.exp at -O2 by default

2015-06-30  Tom de Vries  t...@codesourcery.com

	* testsuite/libgomp.oacc-c++/c++.exp: Set DEFAULT_CFLAGS to -O2 if not
	already set.  Use DEFAULT_CFLAGS in dg-runtest.
	* testsuite/libgomp.oacc-c-c++-common/collapse-3.c: Remove dg-options
	-O2.
---
 libgomp/testsuite/libgomp.oacc-c++/c++.exp   | 7 ++-
 libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c | 1 -
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
index f486f9b..80d1359 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/c++.exp
+++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -13,6 +13,11 @@ if [info exists lang_include_flags] then {
 unset lang_include_flags
 }
 
+# If a testcase doesn't have special options, use these.
+if ![info exists DEFAULT_CFLAGS] then {
+set DEFAULT_CFLAGS -O2
+}
+
 # Initialize dg.
 dg-init
 
@@ -96,7 +101,7 @@ if { $lang_test_file_found } {
 
 	setenv ACC_DEVICE_TYPE $offload_target_openacc
 
-	dg-runtest $tests $tagopt $libstdcxx_includes
+	dg-runtest $tests $tagopt $libstdcxx_includes $DEFAULT_CFLAGS
 }
 }
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c
index a5be728..6800428 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c
@@ -1,5 +1,4 @@
 /* { dg-do run } */
-/* { dg-options -O2 } */
 
 #include string.h
 #include stdlib.h
-- 
1.9.1



[AArch64/testsuite] Restrict pic-small.c by a new directive check_effective_target_aarch64_small_fpic

2015-06-30 Thread Jiong Wang

As discussed here

  https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02151.html,

Since have enabled binutils feature detection when configuring gcc,
-fpic will not be enabled if there is no binutils support on those new
relocation modifiers, thus we should update the testcase pic-small.c,
only run it if check_effective_target_aarch64_small_fpic is true.

On AArch64, if -fpic is not supported, then we will fall back to -fPIC
silently.  So, we can't rely on the existed check_effective_target_fpic as
it assume compiler will give warning if -fpic not supported.  Here we
check whether binutils support those new -fpic relocation modifiers, and
assume -fpic is supported if there is binutils support, as gcc
configuration will enable -fpic for AArch64 on such case. This will not
apply if the test system is with new binutils and old gcc which is rare
and this patch will not handle.

OK for trunk?

Thanks.

2015-06-30  Jiong Wang  jiong.w...@arm.com

gcc/
  * doc/sourcebuild.texi (AArch64-specific attributes): New subsection.
  Document aarch64_small_fpic.

gcc/testsuite/
  * lib/target-supports.exp (check_effective_target_aarch64_small_fpic): New 
function.
  * gcc.target/aarch64/pic-small.c: Restrict this test by
  check_effective_target_aarch64_small_fpic.
  
-- 
Regards,
Jiong

diff --git a/gcc/testsuite/gcc.target/aarch64/pic-small.c b/gcc/testsuite/gcc.target/aarch64/pic-small.c
index 874f81b..282e4d0 100644
--- a/gcc/testsuite/gcc.target/aarch64/pic-small.c
+++ b/gcc/testsuite/gcc.target/aarch64/pic-small.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target aarch64_small_fpic } */
 /* { dg-options -O2 -fpic -fno-inline --save-temps } */
 
 void abort ();
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index f0c209f..4c743c3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -937,6 +937,26 @@ proc check_effective_target_fpic { } {
 return 0
 }
 
+# On AArch64, if -fpic is not supported, then we will fall back to -fPIC
+# silently.  So, we can't rely on above check_effective_target_fpic as it
+# assume compiler will give warning if -fpic not supported.  Here we check
+# whether binutils support those new -fpic relocation modifiers, and assume
+# -fpic is supported is there is binutils support, as gcc configuration will
+# enable -fpic for AArch64 on such case.
+#
+# check_effective_target_aarch64_small_fpic is dedicated for checking small
+# memory model -fpic relocation types.
+
+proc check_effective_target_aarch64_small_fpic { } {
+if { [istarget aarch64*-*-*] } {
+	return [check_no_compiler_messages aarch64_small_fpic object {
+	void foo (void) { asm (ldr	x0, [x2, #:gotpage_lo15:globalsym]); }
+	}]
+} else {
+	return 0
+}
+}
+
 # Return 1 if -shared is supported, as in no warnings or errors
 # emitted, 0 otherwise.
 
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 13f63d1..a0bab65 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1580,6 +1580,15 @@ ARM target prefers @code{LDRD} and @code{STRD} instructions over
 
 @end table
 
+@subsubsection AArch64-specific attributes
+
+@table @code
+@item aarch64_small_fpic
+Binutils installed on test system support relocation types required by -fpic
+for AArch64 small memory model.
+
+@end table
+
 @subsubsection MIPS-specific attributes
 
 @table @code


Re: [patch] PR debug/66653: avoid late_global_decl on decl_type_context()s

2015-06-30 Thread Jason Merrill

On 06/29/2015 06:32 PM, Jason Merrill wrote:

On 06/29/2015 05:07 AM, Richard Biener wrote:

On Fri, Jun 26, 2015 at 11:59 PM, Jason Merrill ja...@redhat.com wrote:

On 06/26/2015 05:37 AM, Richard Biener wrote:


Can we defer TLS model setting to template instantiation?


We need to represent somehow that __thread (or thread_local) was used in the
declaration, but DECL_THREAD_LOCAL_P was changed to refer to the TLS model.


Ok, so easiest would be to allocate a bit from decl_with_vis for this...


Or I can find a flag in the front end.  I guess I'll do that.


Thus.

Jason

commit fcc92fd874243b1366dcbd75ff32cc8862d8ec52
Author: Jason Merrill ja...@redhat.com
Date:   Mon Jun 29 15:30:35 2015 -0400

	PR debug/66653
	* cp-tree.h (CP_DECL_THREAD_LOCAL_P): New.
	(DECL_GNU_TLS_P): Use DECL_LANG_SPECIFIC field.
	(SET_DECL_GNU_TLS_P): New.
	* call.c (make_temporary_var_for_ref_to_temp): Use
	CP_DECL_THREAD_LOCAL_P.
	(set_up_extended_ref_temp): Likewise.
	* decl.c (duplicate_decls, expand_static_init): Likewise.
	(redeclaration_error_message, grokvardecl): Likewise.
	(start_decl, register_dtor_fn, grokdeclarator): Likewise.
	* decl2.c (get_guard, var_needs_tls_wrapper): Likewise.
	(handle_tls_init): Likewise.
	* pt.c (tsubst_decl, tsubst_copy_and_build): Likewise.
	* semantics.c (finish_id_expression): Likewise.
	(handle_omp_array_sections_1, finish_omp_clauses): Likewise.
	(finish_omp_threadprivate): Likewise.
	* tree.c (decl_storage_duration): Likewise.
	* cp-gimplify.c (omp_var_to_track): Likewise.
	(cp_genericize_r): Check that it matches DECL_THREAD_LOCAL_P.
	* lex.c (retrofit_lang_decl): Return if DECL_LANG_SPECIFIC is
	already set.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index b846919..44346bf 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -9556,13 +9556,14 @@ make_temporary_var_for_ref_to_temp (tree decl, tree type)
 
   /* Register the variable.  */
   if (VAR_P (decl)
-   (TREE_STATIC (decl) || DECL_THREAD_LOCAL_P (decl)))
+   (TREE_STATIC (decl) || CP_DECL_THREAD_LOCAL_P (decl)))
 {
   /* Namespace-scope or local static; give it a mangled name.  */
   /* FIXME share comdat with decl?  */
   tree name;
 
   TREE_STATIC (var) = TREE_STATIC (decl);
+  CP_DECL_THREAD_LOCAL_P (var) = CP_DECL_THREAD_LOCAL_P (decl);
   set_decl_tls_model (var, DECL_TLS_MODEL (decl));
   name = mangle_ref_init_variable (decl);
   DECL_NAME (var) = name;
@@ -9683,7 +9684,7 @@ set_up_extended_ref_temp (tree decl, tree expr, vectree, va_gc **cleanups,
   rest_of_decl_compilation (var, /*toplev=*/1, at_eof);
   if (TYPE_HAS_NONTRIVIAL_DESTRUCTOR (type))
 	{
-	  if (DECL_THREAD_LOCAL_P (var))
+	  if (CP_DECL_THREAD_LOCAL_P (var))
 	tls_aggregates = tree_cons (NULL_TREE, var,
 	tls_aggregates);
 	  else
diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c
index 1a627db..b95489e 100644
--- a/gcc/cp/cp-gimplify.c
+++ b/gcc/cp/cp-gimplify.c
@@ -831,7 +831,7 @@ omp_var_to_track (tree decl)
 type = TREE_TYPE (type);
   if (type == error_mark_node || !CLASS_TYPE_P (type))
 return false;
-  if (VAR_P (decl)  DECL_THREAD_LOCAL_P (decl))
+  if (VAR_P (decl)  CP_DECL_THREAD_LOCAL_P (decl))
 return false;
   if (cxx_omp_predetermined_sharing (decl) != OMP_CLAUSE_DEFAULT_UNSPECIFIED)
 return false;
@@ -1157,6 +1157,12 @@ cp_genericize_r (tree *stmt_p, int *walk_subtrees, void *data)
   *stmt_p = build1 (NOP_EXPR, void_type_node, integer_zero_node);
   *walk_subtrees = 0;
 }
+  else if (TREE_CODE (stmt) == DECL_EXPR)
+{
+  tree d = DECL_EXPR_DECL (stmt);
+  if (TREE_CODE (d) == VAR_DECL)
+	gcc_assert (CP_DECL_THREAD_LOCAL_P (d) == DECL_THREAD_LOCAL_P (d));
+}
   else if (TREE_CODE (stmt) == OMP_PARALLEL || TREE_CODE (stmt) == OMP_TASK)
 {
   struct cp_genericize_omp_taskreg omp_ctx;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index e8cc38f..18cf87e 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -51,7 +51,7 @@ c-common.h, not after.
   AGGR_INIT_VIA_CTOR_P (in AGGR_INIT_EXPR)
   PTRMEM_OK_P (in ADDR_EXPR, OFFSET_REF, SCOPE_REF)
   PAREN_STRING_LITERAL (in STRING_CST)
-  DECL_GNU_TLS_P (in VAR_DECL)
+  CP_DECL_THREAD_LOCAL_P (in VAR_DECL)
   KOENIG_LOOKUP_P (in CALL_EXPR)
   STATEMENT_LIST_NO_SCOPE (in STATEMENT_LIST).
   EXPR_STMT_STMT_EXPR_RESULT (in EXPR_STMT)
@@ -2017,7 +2017,7 @@ struct GTY(()) lang_decl_base {
   unsigned repo_available_p : 1;	   /* var or fn */
   unsigned threadprivate_or_deleted_p : 1; /* var or fn */
   unsigned anticipated_p : 1;		   /* fn, type or template */
-  unsigned friend_attr : 1;		   /* fn, type or template */
+  unsigned friend_or_tls : 1;		   /* var, fn, type or template */
   unsigned template_conv_p : 1;		   /* var or template */
   unsigned odr_used : 1;		   /* var or fn */
   unsigned u2sel : 1;
@@ -2438,7 +2438,16 @@ struct GTY(()) lang_decl {
and 

[patch] Show errors when generating libstdc++ API PDF

2015-06-30 Thread Jonathan Wakely

This makes it a bit easier to determine why 'make doc-pdf-doxygen'
failed, for the common case where it's a missing TeX package.

Tested x86_64-linux, committed to trunk.
commit 432831cb555b79d686f3c8bdd115d5386d9d6301
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Jun 30 13:48:19 2015 +0100

	* doc/Makefile.am (stamp-pdf-doxygen): Grep for LaTeX errors in log.
	* doc/Makefile.in: Regenerate.

diff --git a/libstdc++-v3/doc/Makefile.am b/libstdc++-v3/doc/Makefile.am
index d6ea2d1..d63cf17 100644
--- a/libstdc++-v3/doc/Makefile.am
+++ b/libstdc++-v3/doc/Makefile.am
@@ -257,6 +257,7 @@ stamp-latex-doxygen: ${doxygen_outdir}/latex
 # include asking a wizard to enlarge capacity. If this is the case,
 # find texmf.cnf and add a zero for pool_size, string_vacancies,
 # max_strings, and pool_free values.
+# Errors like File `foo.sty' not found mean a TeX package is missing.
 stamp-pdf-doxygen: stamp-latex-doxygen ${doxygen_outdir}/pdf
 	-(cd ${doxygen_outdir}/latex  $(MAKE) -i pdf;)
 	echo Generating doxygen pdf file...;
@@ -265,6 +266,7 @@ stamp-pdf-doxygen: stamp-latex-doxygen ${doxygen_outdir}/pdf
 	  echo :: PDF file is ${api_pdf}; \
 	else \
 	  echo ... error; \
+	  grep -F 'LaTeX Error' ${doxygen_outdir}/latex/refman.log; \
 	  exit 12; \
 	fi
 	$(STAMP) stamp-pdf-doxygen


Re: [PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Manuel López-Ibáñez

On 30/06/15 16:43, Manuel López-Ibáñez wrote:

On 30/06/15 15:18, Dhole wrote:

A solution for toolchain packages that embed timestamps during the build
process has been proposed for anyone interested and it consists of the
following:


Perhaps this has been discussed and discarded before (if so I would appreciate
if you could point me to the relevant discussion), why not simply redefine
__DATE__ and __TIME__ to an appropriate string via the command-line or a dummy
include?

That probably triggers some warnings (or it may not be supported at all, I
haven't tried myself), but fixing those issues leads to a more general solution
than GCC reacting to an arbitrary variable name and changing its behaviour
quite silently.


In any case, you should be aware of point 10 here: 
https://gcc.gnu.org/wiki/Community (You only need to convince the 
decision-makers). I'm not one of them ;)


Cheers,

Manuel.



Re: [PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Dhole
On 06/30/2015 04:48 PM, Manuel López-Ibáñez wrote:
 On 30/06/15 16:43, Manuel López-Ibáñez wrote:
 Perhaps this has been discussed and discarded before (if so I would
 appreciate
 if you could point me to the relevant discussion), why not simply
 redefine
 __DATE__ and __TIME__ to an appropriate string via the command-line or
 a dummy
 include?

I'm not aware of any previous discussion on the subject, but I'm also
interested in reading it in case it exists :)

In the debian reproducible builds project we have considered several
options to address this issue. We considered redefining the __DATE__ and
__TIME__ defines by command line flags passed to gcc, but as you say,
that triggers warnings, which could become errors when building with
-Werror and thus may require manual intervention on many packages.

We are trying to find a solution that can make as much packages build
reproducible as possible minimizing the amount of specific patches for
affected packages, and we believe such solution will benefit other
projects working on reproducible builds as well.

We propose to extend the env variable SOURCE_DATE_EPOCH to anyone
interested for this purpose. For instance, this feature has been
implemented upstream in help2man (1.47.1) [1], quoting the latest
changelog entry:

  * Add support for reproducible builds by using $SOURCE_DATE_EPOCH as
the date for the generated pages (closes: #787444).


 That probably triggers some warnings (or it may not be supported at
 all, I
 haven't tried myself), but fixing those issues leads to a more general
 solution
 than GCC reacting to an arbitrary variable name and changing its
 behaviour
 quite silently.

In case the fact that GCC changing its behavior silently is a concern,
we also discussed the possibility of enabling this feature with a flag
such as `--use-date-from-env`. Again, we are open to comments and other
ideas about this approach :)

 In any case, you should be aware of point 10 here:
 https://gcc.gnu.org/wiki/Community (You only need to convince the
 decision-makers). I'm not one of them ;)

Thanks for the tip!

[1] https://www.gnu.org/software/help2man/

Best regards,
Dhole



signature.asc
Description: OpenPGP digital signature


Re: [gomp4.1] Support #pragma omp target {enter,exit} data

2015-06-30 Thread Ilya Verbin
On Tue, Jun 30, 2015 at 14:57:02 +0200, Jakub Jelinek wrote:
 On Tue, Jun 30, 2015 at 03:19:30PM +0300, Ilya Verbin wrote:
  --- a/libgomp/target.c
  +++ b/libgomp/target.c
  @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool 
  do_copyfrom)
 bool do_unmap = false;
 if (k-refcount  1)
  k-refcount--;
  -  else if (k-async_refcount  0)
  -   k-async_refcount--;
  -  else
  -   do_unmap = true;
  +  else if (k-refcount == 1)
  +   {
  + if (k-async_refcount  0)
  +   k-async_refcount--;
  + else
  +   {
  + k-refcount--;
  + do_unmap = true;
  +   }
  +   }
 
 What is the rationale of this hunk change?

Without whis change, when k-refcount == 1, do_unmap is true, but refcount is
not decremented.  So, if gomp_unmap_vars is called multiple times (now it's
possible for 4.1), refcount will remain 1, and it will try to unmap k at each
next call, that is wrong.  That's why I decrement refcount to zero, and do
nothing when hit gomp_unmap_vars next time with k-refcount == 0.

 BTW, we'll likely need to treat also refcount == INT_MAX as special (never
 decrease it), because I believe declare target vars are supposed to have
 refcount of infinity rather than just 2GB-1.

I'll add special refcount for declare target vars.

  @@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t 
  mapnum, void **hostaddrs,
   }
   
 if (is_enter_data)
  -{
  -  /* TODO  */
  -}
  +gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 
  false);
 
 This will leak the return value.  Either we need to arrange not to allocate
 it for enter data, or we need to assign it to some variable and free
 immediately (we don't want to perform the release operations for it).

But we can't not allocate or free immediately it, since it's used later through
splay_tree_key_s::tgt, e.g. here:

  if (is_target)
{
  for (i = 0; i  mapnum; i++)
{
  if (tgt-list[i].key == NULL)
cur_node.tgt_offset = (uintptr_t) NULL;
  else
cur_node.tgt_offset = tgt-list[i].key-tgt-tgt_start
  + tgt-list[i].key-tgt_offset;

My plan was to free tgt here:

+   if (k-refcount == 0)
+ {
+   splay_tree_remove (devicep-mem_map, k);
+   if (k-tgt-refcount  1)
+ k-tgt-refcount--;
+   else
+ gomp_unmap_tgt (k-tgt);
+ }

But now I understood that this will work only for simple cases like:

  #pragma omp target enter data ...
  ...
  #pragma omp target exit data ...

And will leak e.g. in:

  #pragma omp target data ...
{
  #pragma omp target enter data ...
}

 else
  -{
  -  /* TODO  */
  -}
  +for (i = 0; i  mapnum; i++)
  +  {
  +   struct splay_tree_key_s cur_node;
  +   unsigned char kind = kinds[i]  typemask;
  +   switch (kind)
  + {
  + case GOMP_MAP_FROM:
  + case GOMP_MAP_ALWAYS_FROM:
  + case GOMP_MAP_DELETE:
  + case GOMP_MAP_RELEASE:
  +   cur_node.host_start = (uintptr_t) hostaddrs[i];
  +   cur_node.host_end = cur_node.host_start + sizes[i];
  +   gomp_mutex_lock (devicep-lock);
 
 I don't really like locking the mutex for each map clause in exit data
 separately.  Perhaps just add a gomp_exit_data function similar to
 gomp_map_vars that will run this loop and be surrounded by the locking,
 or do it inline, but with the lock/unlock around the whole loop.
 exit data construct must have at least one map clause, so it doesn't make
 sense not to lock immediately.

I'll move locks outside of the loop.

  +   splay_tree_key k = splay_tree_lookup (devicep-mem_map, cur_node);
  +   if (!k)
  + {
  +   gomp_mutex_unlock (devicep-lock);
  +   continue;
  + }
  +
  +   if (k-refcount  0)
  + k-refcount--;
  +   if (kind == GOMP_MAP_DELETE)
  + k-refcount = 0;
 
 See above, I believe delete should not delete refcount == INT_MAX
 mappings.

Will do that.

  -- Ilya


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Marek Polacek
On Tue, Jun 30, 2015 at 03:59:23PM +0200, Richard Biener wrote:
 On Tue, 30 Jun 2015, Marek Polacek wrote:
 
  On Tue, Jun 30, 2015 at 03:13:14PM +0200, Richard Biener wrote:
   On Tue, 30 Jun 2015, Marek Polacek wrote:
   
On Tue, Jun 30, 2015 at 02:47:49PM +0200, Richard Biener wrote:
 On Tue, 30 Jun 2015, Marek Polacek wrote:
 
  On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
   Does my suggestion to build the all_ones constant in TREE_TYPE 
   (@0) and
   convert that to type help for that?
  
  It appears to work, but it seems weird to me to create a integer 
  constant
  in one type and then immediately cast it to another type.
 
 Yes.  Do you have a testcase now that fails using bools?

I don't have a testcase that fails with the pattern we currently have, 
i.e.
the one with tree_nop_conversion_p.
   
   I mean with removing tree_nop_conversion_p.
  
  Aha.  With tree_nop_conversion_p removed, gcc.dg/binop-notor2.c fails,
  because there we optimize the return statement to return -1 instead
  of return 1.
  https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02179.html
 
 Hmm ok.  That testcase is basically
 
 int foo (_Bool a)
 {
   return ((int) a) | ((int) ~a);
 }
 
 where indeed with unsigned bool (yeah, our bool is unsigned) we
 get zero-extension on both arms.  Similar issue would show up with
 
 int foo (unsigned char a)
 {
   return ((int) a) | ((int) ~a);
 }
 
 so it's not specific to bools.  So yes, the suggestion to
 do
 
   (convert { build_all_ones_cst (TREE_TYPE (@0)); })
 
 would work here.

Ok, so do you want me to change that pattern to use this
(convert { build_all_ones_cst (TREE_TYPE (@0)); })
(along with a new test containing those two functions you mentioned)?

If so, is such a patch preapproved provided it passes the usual testing?

Marek


Re: [PATCH] Move ~X | X - -1 folding

2015-06-30 Thread Richard Biener
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 03:59:23PM +0200, Richard Biener wrote:
  On Tue, 30 Jun 2015, Marek Polacek wrote:
  
   On Tue, Jun 30, 2015 at 03:13:14PM +0200, Richard Biener wrote:
On Tue, 30 Jun 2015, Marek Polacek wrote:

 On Tue, Jun 30, 2015 at 02:47:49PM +0200, Richard Biener wrote:
  On Tue, 30 Jun 2015, Marek Polacek wrote:
  
   On Tue, Jun 30, 2015 at 01:39:29PM +0200, Marc Glisse wrote:
Does my suggestion to build the all_ones constant in TREE_TYPE 
(@0) and
convert that to type help for that?
   
   It appears to work, but it seems weird to me to create a integer 
   constant
   in one type and then immediately cast it to another type.
  
  Yes.  Do you have a testcase now that fails using bools?
 
 I don't have a testcase that fails with the pattern we currently 
 have, i.e.
 the one with tree_nop_conversion_p.

I mean with removing tree_nop_conversion_p.
   
   Aha.  With tree_nop_conversion_p removed, gcc.dg/binop-notor2.c fails,
   because there we optimize the return statement to return -1 instead
   of return 1.
   https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02179.html
  
  Hmm ok.  That testcase is basically
  
  int foo (_Bool a)
  {
return ((int) a) | ((int) ~a);
  }
  
  where indeed with unsigned bool (yeah, our bool is unsigned) we
  get zero-extension on both arms.  Similar issue would show up with
  
  int foo (unsigned char a)
  {
return ((int) a) | ((int) ~a);
  }
  
  so it's not specific to bools.  So yes, the suggestion to
  do
  
(convert { build_all_ones_cst (TREE_TYPE (@0)); })
  
  would work here.
 
 Ok, so do you want me to change that pattern to use this
 (convert { build_all_ones_cst (TREE_TYPE (@0)); })
 (along with a new test containing those two functions you mentioned)?
 
 If so, is such a patch preapproved provided it passes the usual testing?

Yes.

Thanks,
Richard.


[patch] Constrain allocator_arg_t to only work with valid Allocators

2015-06-30 Thread Jonathan Wakely

This is what I have been experimenting with as an alternative to
making allocator_arg_t non-DefaultConstructible.

By replacing allocator_arg_t parameters with __alloc_arg_t_Alloc we
can constrain constructors to only participate in overload resolution
when _Alloc quacks sufficiently like an allocator.

This ensures that tupleT,U{ {} , U{} } is unambiguous (unless U
happens to be an allocator, or a reasonable forgery of one).

Making this change required fixing some tests where I was passing
fake allocator arguments to tuples, which violated the preconditions
before but was accepted. Rejecting those precondition violations at
compile-time suggests to me that this is a nice improvement. The
downside is that it probably hurts compilation times even more, as
every change related to allocators seems to.

Any comments or suggestions about this approach?

I'm also playing with another change to make allocator_traitsA
SFINAE-friendly, by only defining the nested allocator_type member
when __is_allocatorA is true. If it works I think that might be
worth standardising.
commit ea134c1ada93bbeca9f09a06513704639a752d39
Author: Jonathan Wakely jwak...@redhat.com
Date:   Mon Jun 29 17:08:12 2015 +0100

	* include/bits/uses_allocator.h (__uses_alloc_t, __use_alloc): Add
	comments.
	(__is_allocator): New trait.
	(__alloc_arg_t): New SFINAE helper for detecting allocators.
	* include/std/tuple (tuple): Replace allocator_arg_t with
	__alloc_arg_t_Alloc.
	* testsuite/20_util/tuple/cons/allocators.cc: Use a valid Allocator.
	* testsuite/20_util/uses_allocator/cons_neg.cc: Likewise.
	* testsuite/20_util/uses_allocator/construction.cc: Likewise.

diff --git a/libstdc++-v3/include/bits/uses_allocator.h b/libstdc++-v3/include/bits/uses_allocator.h
index f9ea7d6..293a315 100644
--- a/libstdc++-v3/include/bits/uses_allocator.h
+++ b/libstdc++-v3/include/bits/uses_allocator.h
@@ -38,6 +38,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// [allocator.tag]
   struct allocator_arg_t { };
 
+  /// A tag of type allocator_arg_t.
   constexpr allocator_arg_t allocator_arg = allocator_arg_t();
 
   templatetypename _Tp, typename _Alloc, typename = __void_t
@@ -58,14 +59,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   struct __uses_alloc_base { };
 
+  /// A tag type indicating construction without an allocator.
   struct __uses_alloc0 : __uses_alloc_base
   {
 struct _Sink { void operator=(const void*) { } } _M_a;
   };
 
+  /// A tag type indicating construction with allocator_arg_t.
   templatetypename _Alloc
 struct __uses_alloc1 : __uses_alloc_base { const _Alloc* _M_a; };
 
+  /// A tag type indicating construction with an allocator argument at the end.
   templatetypename _Alloc
 struct __uses_alloc2 : __uses_alloc_base { const _Alloc* _M_a; };
 
@@ -84,10 +88,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __uses_allocfalse, _Tp, _Alloc, _Args...
 : __uses_alloc0 { };
 
+  /// A tag type indicating whether/how to construct with an allocator.
   templatetypename _Tp, typename _Alloc, typename... _Args
 using __uses_alloc_t =
   __uses_allocuses_allocator_Tp, _Alloc::value, _Tp, _Alloc, _Args...;
 
+  /// Make a tag type indicating how to use an allocator for construction.
   templatetypename _Tp, typename _Alloc, typename... _Args
 inline __uses_alloc_t_Tp, _Alloc, _Args...
 __use_alloc(const _Alloc __a)
@@ -97,6 +103,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __ret;
 }
 
+  /// Check for required Allocator functions (only as an unevaluated operand).
+  templatetypename _Alloc
+auto
+__can_allocate(_Alloc* __a)
+- decltype(__a-deallocate(__a-allocate(1u), 1u));
+
+  /// Primary template handles all cases that don't look like Allocators.
+  templatetypename _Alloc, typename = __void_t
+struct __is_allocator_impl
+: false_type { };
+
+  /// Specialization recognizes types that define value_type and can allocate.
+  templatetypename _Alloc
+struct __is_allocator_impl_Alloc,
+			   __void_ttypename _Alloc::value_type,
+decltype(__can_allocate_Alloc(0))
+: true_type { };
+
+  /// Detect whether a type might be an Allocator.
+  templatetypename _Alloc
+struct __is_allocator : __is_allocator_impl_Alloc::type
+{ };
+
+  /// Alias for allocator_arg_t that is only valid if _Alloc is an Allocator.
+  templatetypename _Alloc
+using __alloc_arg_t = typename enable_if__is_allocator_Alloc::value,
+	 allocator_arg_t::type;
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace std
 
diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index 59b992a..a765bc8 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -640,7 +640,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // Allocator-extended constructors.
 
   templatetypename _Alloc
-	tuple(allocator_arg_t __tag, const _Alloc __a)
+	tuple(__alloc_arg_t_Alloc __tag, const _Alloc __a)
 	: _Inherited(__tag, __a) { 

RE: [PATCH] Discard Scops for which entry==exit

2015-06-30 Thread Aditya K
Hi Tobias,
A test case (gcc/testsuite/gcc.dg/graphite/pr18792.c) came up when we removed 
`graphite-scop-detection.c:limit_scops'.
The test case is a scop where entry==exit,

BB5 (*#) - BB6 (#);
BB6 - BB5;

In this case BB2 is out of the scop. This is basically an empty (infinite) loop 
with no entry.

--

(gdb) p debug_loops(3)
loop_0 (header = 0, latch = 1, niter = )
{
  bb_2 (preds = {bb_0 }, succs = {bb_8 bb_3 })
  {
    bb 2:
    # VUSE .MEM_7(D)
    _5 = *x_10(D)[3];
    if (_5  0.0)
      goto bb 8;
    else
      goto bb 3;

  }
  bb_3 (preds = {bb_2 }, succs = {bb_4 bb_7 })
  {
    bb 3:
    if (_5= 0.0)
      goto bb 4;
    else
      goto bb 7;

  }
  bb_4 (preds = {bb_3 bb_8 }, succs = {bb_9 })
  {
    bb 4:
    # .MEM_19 = PHI .MEM_7(D)(3), .MEM_14(8)

  }
  bb_9 (preds = {bb_4 }, succs = {bb_5 })
  {
    bb 9:

  }
  bb_7 (preds = {bb_3 }, succs = {bb_1 })
  {
    bb 7:
    # VUSE .MEM_7(D)
    return;

  }
  bb_8 (preds = {bb_2 }, succs = {bb_4 })
  {
    bb 8:
    # .MEM_14 = VDEF .MEM_7(D)
    *x_10(D)[3] = 0.0;
    goto bb 4;

  }
  loop_2 (header = 5, latch = 6, niter = scev_not_known)
  {
    bb_5 (preds = {bb_9 bb_6 }, succs = {bb_6 })
    {
      bb 5:
      # .MEM_25 = PHI .MEM_19(9), .MEM_25(6)

    }
    bb_6 (preds = {bb_5 }, succs = {bb_5 })
    {
      bb 6:
      goto bb 5;

    }
  }
}



digraph all {
0 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 0 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
2 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 2 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
3 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 3 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
4 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 4 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
9 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 9 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
5 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#e41a1c 5*# /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
6 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#e41a1c 6 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
7 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 7 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
8 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 8 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
1 [label=
  TABLE BORDER=0 CELLBORDER=1 CELLSPACING=0
    TRTD WIDTH=50 BGCOLOR=#ff 1 /TD/TR
  /TABLE, shape=box, style=setlinewidth(0)]
0 - 2;
2 - 8;
2 - 3;
3 - 4;
3 - 7;
4 - 9;
9 - 5;
5 - 6;
6 - 5;
7 - 1;
8 - 4;
}


-Aditya


 Date: Tue, 30 Jun 2015 08:11:01 +0200
 From: tob...@grosser.es
 To: seb...@gmail.com; hiradi...@msn.com
 CC: gcc-patches@gcc.gnu.org
 Subject: Re: [PATCH] Discard Scops for which entry==exit

 On 06/30/2015 02:09 AM, Sebastian Pop wrote:
 On Mon, Jun 29, 2015 at 3:04 PM, Aditya Kumar hiradi...@msn.com wrote:
 In this patch we discard the scops where entry and exit are the same BB.
 This is an effort to remove graphite-scop-detection.c:limit_scops.
 Removing the limit_scops function introduces correctness regressions.
 We are making relevant changes in incremental steps to fix those bugs,
 and finally we intend to remove limit_scops.

 2015-06-29 Aditya Kumar aditya...@samsung.com
 Sebastian Pop s@samsung.com

 * graphite-scop-detection.c (build_scops_1): Discard scops for which 
 entry==exit

 Looks good to me.
 Let's wait on comments from Tobi before pushing this patch.

 Hi Sebastian,

 the commit message should probably give a short reasoning why scops with
 entry == exit need to be discarded. I currently don't see why they would
 be incorrect/problematic (despite being possibly very small/empty).

 Tobias
  

Re: [patch] Constrain allocator_arg_t to only work with valid Allocators

2015-06-30 Thread Jonathan Wakely

On 30/06/15 15:49 +0100, Jonathan Wakely wrote:

I'm also playing with another change to make allocator_traitsA
SFINAE-friendly, by only defining the nested allocator_type member
when __is_allocatorA is true. If it works I think that might be
worth standardising.


Something like this, although with the __alloc_arg_t helper I don't
have an immediate use for a SFINAE-friendly allocator_traits.

commit 79ebd1b450c77c592ec8adabbad162a4e9d7bc51
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Jun 30 11:22:23 2015 +0100

define allocator_traits::allocator_type conditionally

diff --git a/libstdc++-v3/include/bits/alloc_traits.h b/libstdc++-v3/include/bits/alloc_traits.h
index bb98c1d..25ccd97 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h
@@ -34,6 +34,7 @@
 
 #include bits/memoryfwd.h
 #include bits/ptr_traits.h
+#include bits/uses_allocator.h
 #include ext/numeric_traits.h
 
 namespace std _GLIBCXX_VISIBILITY(default)
@@ -75,15 +76,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   templatetypename _Alloc, typename _Tp
 using __alloc_rebind = typename __alloctr_rebind_Alloc, _Tp::__type;
 
+  templatetypename _Alloc, bool = __is_allocator_Alloc::value
+struct __allocator_traits_base
+{
+  /// The allocator type
+  typedef _Alloc allocator_type;
+};
+
+  templatetypename _Alloc
+struct __allocator_traits_base_Alloc, false
+{ };
+
   /**
* @brief  Uniform interface to all allocator types.
* @ingroup allocators
   */
   templatetypename _Alloc
-struct allocator_traits
+struct allocator_traits : __allocator_traits_base_Alloc
 {
-  /// The allocator type
-  typedef _Alloc allocator_type;
+  // The allocator_type typedef is conditionally defined in the base class.
+
   /// The allocated type
   typedef typename _Alloc::value_type value_type;
 


Re: [PATCH/RFC] Make loop-header-copying more aggressive, rerun before tree-if-conversion

2015-06-30 Thread Alan Lawrence

Jeff Law wrote:
Thanks.  Does running the phi-only propagator after the loop header 
copying help?  At first glance it would seem that it ought to propagate 
the values of those degenerate PHIs then eliminate those PHIs.


It was written to cleanup after jump threading which has a tendency to 
create very similar code to what you've shown below and to do so very 
quickly.


Thanks for the tip - this fixes up some examples, but not at all. Other examples 
require also a call to rewrite_into_loop_closed_ssa and recomputing 
dominators...maybe I can get everything to work with all of those, but my 
feeling is to keep it as a pass: if the first pass_ch justifies being a pass in 
its own right, then surely a *more aggressive* version of that, does too...




Re: [gomp4.1] Support #pragma omp target {enter,exit} data

2015-06-30 Thread Jakub Jelinek
On Tue, Jun 30, 2015 at 06:42:01PM +0300, Ilya Verbin wrote:
 On Tue, Jun 30, 2015 at 14:57:02 +0200, Jakub Jelinek wrote:
  On Tue, Jun 30, 2015 at 03:19:30PM +0300, Ilya Verbin wrote:
   --- a/libgomp/target.c
   +++ b/libgomp/target.c
   @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool 
   do_copyfrom)
  bool do_unmap = false;
  if (k-refcount  1)
 k-refcount--;
   -  else if (k-async_refcount  0)
   - k-async_refcount--;
   -  else
   - do_unmap = true;
   +  else if (k-refcount == 1)
   + {
   +   if (k-async_refcount  0)
   + k-async_refcount--;
   +   else
   + {
   +   k-refcount--;
   +   do_unmap = true;
   + }
   + }
  
  What is the rationale of this hunk change?
 
 Without whis change, when k-refcount == 1, do_unmap is true, but refcount is
 not decremented.  So, if gomp_unmap_vars is called multiple times (now it's
 possible for 4.1), refcount will remain 1, and it will try to unmap k at each
 next call, that is wrong.  That's why I decrement refcount to zero, and do
 nothing when hit gomp_unmap_vars next time with k-refcount == 0.

Ok.

  if (is_enter_data)
   -{
   -  /* TODO  */
   -}
   +gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 
   false);
  
  This will leak the return value.  Either we need to arrange not to allocate
  it for enter data, or we need to assign it to some variable and free
  immediately (we don't want to perform the release operations for it).
 
 But we can't not allocate or free immediately it, since it's used later 
 through
 splay_tree_key_s::tgt, e.g. here:
 
   if (is_target)
 {
   for (i = 0; i  mapnum; i++)
   {
 if (tgt-list[i].key == NULL)
   cur_node.tgt_offset = (uintptr_t) NULL;
 else
   cur_node.tgt_offset = tgt-list[i].key-tgt-tgt_start
 + tgt-list[i].key-tgt_offset;

The thing is whether it is actually a good idea to allocate the enter data
allocated objects together.
In OpenMP 4.0, generally objects would be allocated and deallocated at the
same times, except for multiple host threads trying to map the same variables
into the target.  In OpenMP 4.1, due to enter data/exit data, they can be
allocated and freed quite independently, and it is true that is the case
even for target data, one can either target data, then target enter data
to prevent something from being deallocated, then target data end freeing
only parts, etc.  So the question is if we think in real-world the
allocation or deallocation will be usually together or not.

Jakub


[gomp] openacc reduction cleanup

2015-06-30 Thread Cesar Philippidis
This patch removes a lot of the crufty code which was necessary for an
arbitrary sized vector reductions. The new plan going forward it to fix
vector_length to a size such that vector loops don't require any
synchronization after the loop have terminated. In the case of nvptx
targets, vector_length = warp_sz, which is currently 32 threads. I'll
follow up this patch with another one which adds support for tree
reductions in vector loops in a later date.

This patch has been applied to gomp-4_0-branch.

Cesar


[match.pd, committed] Improve X - (X / Y) * Y

2015-06-30 Thread Marek Polacek
This patch improves the X - (X / Y) * Y simplification a bit; with
using convert[12]? we're also able to fold INTEGER_CSTs with different
types.

Except using convert[12]? I also had to convert both operands of the
resulting expression to keep the gimple verifier happy.

Bootstrapped/regtested on x86_64-linux, applying to trunk.

2015-06-30  Marek Polacek  pola...@redhat.com

* match.pd (X - (X / Y) * Y): Use convert1 and convert2.  Convert
both operands of the resulting expression.  

* gcc.dg/fold-minus-6.c: New test.

diff --git gcc/match.pd gcc/match.pd
index 682784b..e6728f4 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -240,9 +240,9 @@ along with GCC; see the file COPYING3.  If not see
 
 /* X - (X / Y) * Y is the same as X % Y.  */
 (simplify
- (minus (convert? @0) (convert? (mult (trunc_div @0 @1) @1)))
+ (minus (convert1? @0) (convert2? (mult (trunc_div @0 @1) @1)))
  (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-  (convert (trunc_mod @0 @1
+  (trunc_mod (convert @0) (convert @1
 
 /* Optimize TRUNC_MOD_EXPR by a power of two into a BIT_AND_EXPR,
i.e. X % C into X  (C - 1), if X and C are positive.
diff --git gcc/testsuite/gcc.dg/fold-minus-6.c 
gcc/testsuite/gcc.dg/fold-minus-6.c
index e69de29..1c22c25 100644
--- gcc/testsuite/gcc.dg/fold-minus-6.c
+++ gcc/testsuite/gcc.dg/fold-minus-6.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-cddce1 } */
+
+int
+fn1 (int x)
+{
+  return 42L - (42 / x) * x;
+}
+
+long
+fn2 (int x)
+{
+  return 42L - (42 / x) * x;
+}
+
+int
+fn3 (long int x)
+{
+  return 42L - (42 / x) * x;
+}
+
+int
+fn4 (int a, int b)
+{
+  return a - (unsigned) ((a / b) * b);
+}
+
+int
+fn5 (int a, unsigned int b)
+{
+  return a - ((a / b) * b);
+}
+
+unsigned int
+fn6 (int a, int b)
+{
+  return a - ((a / b) * b);
+}
+
+/* { dg-final { scan-tree-dump-not  /  cddce1 } } */
+/* { dg-final { scan-tree-dump-not  -  cddce1 } } */
+/* { dg-final { scan-tree-dump-not  \\+  cddce1 } } */

Marek


[match.pd, committed] Tweak ~x | x

2015-06-30 Thread Marek Polacek
As discussed here: https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02223.html.

Bootstrapped/regtested on x86_64-linux, applying to trunk.

2015-06-30  Marek Polacek  pola...@redhat.com

* match.pd (~x | x): Don't use tree_nop_conversion_p.  Build
the final expression with the operand's type and then convert
it to the type of the expression.

* gcc.dg/fold-ior-3.c: New test.

diff --git gcc/match.pd gcc/match.pd
index 682784b..adb7a52 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -286,8 +286,7 @@ along with GCC; see the file COPYING3.  If not see
 /* ~x | x - -1 */
 (simplify
  (bit_ior:c (convert? @0) (convert? (bit_not @0)))
- (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
-  { build_all_ones_cst (type); }))
+ (convert { build_all_ones_cst (TREE_TYPE (@0)); }))
 
 /* x ^ x - 0 */
 (simplify
diff --git gcc/testsuite/gcc.dg/fold-ior-3.c gcc/testsuite/gcc.dg/fold-ior-3.c
index e69de29..ed89ff9 100644
--- gcc/testsuite/gcc.dg/fold-ior-3.c
+++ gcc/testsuite/gcc.dg/fold-ior-3.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-cddce1 } */
+
+int
+fn1 (_Bool a)
+{
+  return ((int) a) | ((int) ~a);
+}
+
+int
+fn2 (unsigned char a)
+{
+  return ((int) a) | ((int) ~a);
+}
+
+int
+fn3 (unsigned short a)
+{
+  return ((int) a) | ((int) ~a);
+}
+
+int
+fn4 (signed char a)
+{
+  return ((int) a) | ((int) ~a);
+}
+
+int
+fn5 (signed short a)
+{
+  return ((int) a) | ((int) ~a);
+}
+
+/* { dg-final { scan-tree-dump-not ~ cddce1 } } */
+/* { dg-final { scan-tree-dump-not  \\|  cddce1 } } */

Marek


patch to fix PR66691

2015-06-30 Thread Vladimir Makarov

The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66691

The patch was tested and bootstrapped on x86/x86-64.

Committed as rev. 225200.

Index: ChangeLog
===
--- ChangeLog	(revision 225194)
+++ ChangeLog	(working copy)
@@ -1,3 +1,19 @@
+2015-06-30  Vladimir Makarov  vmaka...@redhat.com
+
+	PR debug/66691
+	* lra-int.h (lra_substitute_pseudo): Add a parameter.
+	(lra_substitute_pseudo_within_insn): Ditto.
+	* lra.c (lra_substitute_pseudo): Add a parameter.  Simplify subreg
+	of constant.
+	(lra_substitute_pseudo_within_insn): Add a parameter.  Transfer it
+	to lra_substitute_pseudo.
+	* lra-lives.c (process_bb_lives): Add an argument to
+	lra_substitute_pseudo_within_insn call.
+	* lra-constraints.c (inherit_reload_reg, split_reg): Add an
+	argument to lra_substitute_pseudo and
+	lra_substitute_pseudo_within_insn calls.
+	(remove_inheritance_pseudos, undo_optional_reloads): Ditto.
+
 2015-06-30  Richard Biener  rguent...@suse.de
 
 	* fold-const.c (fold_binary_loc): Move ~x  ~y - ~(x | y) and
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog	(revision 225194)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2015-06-30  Vladimir Makarov  vmaka...@redhat.com
+
+	PR debug/66691
+	* gcc.target/i386/pr66691.c: New.
+
 2015-06-30  Edward Smith-Rowland  3dw...@verizon.net
 
 	Implement N4197 - Adding u8 character literals
Index: lra.c
===
--- lra.c	(revision 225134)
+++ lra.c	(working copy)
@@ -1777,9 +1777,10 @@ lra_process_new_insns (rtx_insn *insn, r
 
 
 /* Replace all references to register OLD_REGNO in *LOC with pseudo
-   register NEW_REG.  Return true if any change was made.  */
+   register NEW_REG.  Try to simplify subreg of constant if SUBREG_P.
+   Return true if any change was made.  */
 bool
-lra_substitute_pseudo (rtx *loc, int old_regno, rtx new_reg)
+lra_substitute_pseudo (rtx *loc, int old_regno, rtx new_reg, bool subreg_p)
 {
   rtx x = *loc;
   bool result = false;
@@ -1791,9 +1792,25 @@ lra_substitute_pseudo (rtx *loc, int old
 return false;
 
   code = GET_CODE (x);
-  if (code == REG  (int) REGNO (x) == old_regno)
+  if (code == SUBREG  subreg_p)
 {
-  machine_mode mode = GET_MODE (*loc);
+  rtx subst, inner = SUBREG_REG (x);
+  /* Transform subreg of constant while we still have inner mode
+	 of the subreg.  The subreg internal should not be an insn
+	 operand.  */
+  if (REG_P (inner)  (int) REGNO (inner) == old_regno
+	   CONSTANT_P (new_reg)
+	   (subst = simplify_subreg (GET_MODE (x), new_reg, GET_MODE (inner),
+   SUBREG_BYTE (x))) != NULL_RTX)
+	{
+	  *loc = subst;
+	  return true;
+	}
+  
+}
+  else if (code == REG  (int) REGNO (x) == old_regno)
+{
+  machine_mode mode = GET_MODE (x);
   machine_mode inner_mode = GET_MODE (new_reg);
 
   if (mode != inner_mode
@@ -1815,26 +1832,30 @@ lra_substitute_pseudo (rtx *loc, int old
 {
   if (fmt[i] == 'e')
 	{
-	  if (lra_substitute_pseudo (XEXP (x, i), old_regno, new_reg))
+	  if (lra_substitute_pseudo (XEXP (x, i), old_regno,
+ new_reg, subreg_p))
 	result = true;
 	}
   else if (fmt[i] == 'E')
 	{
 	  for (j = XVECLEN (x, i) - 1; j = 0; j--)
-	if (lra_substitute_pseudo (XVECEXP (x, i, j), old_regno, new_reg))
+	if (lra_substitute_pseudo (XVECEXP (x, i, j), old_regno,
+   new_reg, subreg_p))
 	  result = true;
 	}
 }
   return result;
 }
 
-/* Call lra_substitute_pseudo within an insn.  This won't update the insn ptr,
-   just the contents of the insn.  */
+/* Call lra_substitute_pseudo within an insn.  Try to simplify subreg
+   of constant if SUBREG_P.  This won't update the insn ptr, just the
+   contents of the insn.  */
 bool
-lra_substitute_pseudo_within_insn (rtx_insn *insn, int old_regno, rtx new_reg)
+lra_substitute_pseudo_within_insn (rtx_insn *insn, int old_regno,
+   rtx new_reg, bool subreg_p)
 {
   rtx loc = insn;
-  return lra_substitute_pseudo (loc, old_regno, new_reg);
+  return lra_substitute_pseudo (loc, old_regno, new_reg, subreg_p);
 }
 
 
Index: lra-constraints.c
===
--- lra-constraints.c	(revision 225134)
+++ lra-constraints.c	(working copy)
@@ -4711,7 +4711,7 @@ inherit_reload_reg (bool def_p, int orig
 	}
   return false;
 }
-  lra_substitute_pseudo_within_insn (insn, original_regno, new_reg);
+  lra_substitute_pseudo_within_insn (insn, original_regno, new_reg, false);
   lra_update_insn_regno_info (insn);
   if (! def_p)
 /* We now have a new usage insn for original regno.  */
@@ -4743,7 +4743,7 @@ inherit_reload_reg (bool def_p, int orig
 	  lra_assert (DEBUG_INSN_P (usage_insn));
 	  next_usage_insns = XEXP (next_usage_insns, 1);
 	}
-  lra_substitute_pseudo (usage_insn, original_regno, new_reg);
+

Re: [PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Martin Sebor

In the debian reproducible builds project we have considered several
options to address this issue. We considered redefining the __DATE__ and
__TIME__ defines by command line flags passed to gcc, but as you say,
that triggers warnings, which could become errors when building with
-Werror and thus may require manual intervention on many packages.


Would replacing the localtime function with one of your own
in a DSO and preloading the DSO when invoking GCC be a viable
solution? E.g., like so:

$ cat time.c  gcc -Wall -fpic -o libtime.so -shared time.c  echo 
__DATE__ __TIME__ | LD_PRELOAD=./libtime.so gcc -E -xc -

#include time.h
static struct tm t;
struct tm *localtime (const time_t *timer) { return t; }
# 1 stdin
# 1 built-in
# 1 command-line
# 1 /usr/include/stdc-predef.h 1 3 4
# 1 command-line 2
# 1 stdin
Jan  0 1900 00:00:00

Martin


Re: [PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Mike Stump
On Jun 30, 2015, at 10:38 AM, Martin Sebor mse...@gmail.com wrote:
 In the debian reproducible builds project we have considered several
 options to address this issue. We considered redefining the __DATE__ and
 __TIME__ defines by command line flags passed to gcc, but as you say,
 that triggers warnings, which could become errors when building with
 -Werror and thus may require manual intervention on many packages.
 
 Would replacing the localtime function with one of your own
 in a DSO and preloading the DSO when invoking GCC be a viable
 solution?

No please.  Not all systems have shared libraries, preloading and so on.  
Indeed, I like building on my target system using the simulator, no shared 
libraries around.

[C++ Patch] PR 60365

2015-06-30 Thread Paolo Carlini

Hi,

this issue is an accepts invalid about multiple noreturn in an 
attribute-list. The same issue exists for deprecated in C++14 (and will 
exist for carries_dependency when we'll implement it...). Of course it 
would be easy to imagine more efficient algorithms doing the check only 
once after the entire attribute-list is parsed, but the simple-minded 
check has the advantage that the locations are correct for free and I 
suppose should be good enough efficiency-wise for reasonable 
attribute-lists... Tested x86_64-linux.


Thanks,
Paolo.

//
/cp
2015-06-30  Paolo Carlini  paolo.carl...@oracle.com

PR c++/60365
* parser.c (cp_parser_check_std_attribute): New.
(cp_parser_std_attribute_list): Call it.

/testsuite
2015-06-30  Paolo Carlini  paolo.carl...@oracle.com

PR c++/60365
* g++.dg/cpp0x/gen-attrs-60.C: New.
* g++.dg/cpp1y/attr-deprecated-2.C: Likewise.
Index: cp/parser.c
===
--- cp/parser.c (revision 225177)
+++ cp/parser.c (working copy)
@@ -22532,6 +22547,35 @@ cp_parser_std_attribute (cp_parser *parser)
   return attribute;
 }
 
+/* Check that the attribute ATTRIBUTE appears at most once in the
+   attribute-list ATTRIBUTES.  This is enforced for noreturn (7.6.3)
+   and deprecated (7.6.5).  Note that carries_dependency (7.6.4)
+   isn't implemented yet in GCC.  */
+
+static bool
+cp_parser_check_std_attribute (tree attributes, tree attribute)
+{
+  if (attributes)
+{
+  tree name = get_attribute_name (attribute);
+  if (is_attribute_p (noreturn, name)
+  lookup_attribute (noreturn, attributes))
+   {
+ error (attribute noreturn can appear at most once 
+in an attribute-list);
+ return false;
+   }
+  else if (is_attribute_p (deprecated, name)
+   lookup_attribute (deprecated, attributes))
+   {
+ error (attribute deprecated can appear at most once 
+in an attribute-list);
+ return false;
+   }
+}
+  return true;
+}
+
 /* Parse a list of standard C++-11 attributes.
 
attribute-list:
@@ -22554,6 +22598,8 @@ cp_parser_std_attribute_list (cp_parser *parser)
break;
   if (attribute != NULL_TREE)
{
+ if (!cp_parser_check_std_attribute (attributes, attribute))
+   break;
  TREE_CHAIN (attribute) = attributes;
  attributes = attribute;
}
Index: testsuite/g++.dg/cpp0x/gen-attrs-60.C
===
--- testsuite/g++.dg/cpp0x/gen-attrs-60.C   (revision 0)
+++ testsuite/g++.dg/cpp0x/gen-attrs-60.C   (working copy)
@@ -0,0 +1,4 @@
+// PR c++/60365
+// { dg-do compile { target c++11 } }
+
+void func [[noreturn, noreturn]] (); // { dg-error at most once }
Index: testsuite/g++.dg/cpp1y/attr-deprecated-2.C
===
--- testsuite/g++.dg/cpp1y/attr-deprecated-2.C  (revision 0)
+++ testsuite/g++.dg/cpp1y/attr-deprecated-2.C  (working copy)
@@ -0,0 +1,4 @@
+// PR c++/60365
+// { dg-do compile { target c++14 } }
+
+void func [[deprecated, deprecated]] (); // { dg-error at most once }


Re: [C++ Patch] PR 60365

2015-06-30 Thread Jason Merrill

OK.

Jason


Re: [PATCH] Allow embedded timestamps by C/C++ macros to be set externally

2015-06-30 Thread Manuel López-Ibáñez
On 30 June 2015 at 17:18, Dhole dh...@openmailbox.org wrote:
 In the debian reproducible builds project we have considered several
 options to address this issue. We considered redefining the __DATE__ and
 __TIME__ defines by command line flags passed to gcc, but as you say,
 that triggers warnings, which could become errors when building with
 -Werror and thus may require manual intervention on many packages.

Well, it would require adding -Wno-something (-Wno-reproducible?
-Wno-unreproducible? or perhaps simply -freproducible? ) to some
CFLAGS/CXXFLAGS. Is that too much manual intervention? (I'm asking
sincerely, perhaps indeed it is).

This could be a big hammer option that simply disables any warning
that is not relevant for reproducible builds (the default being
-Wsomething), for example avoid emitting --Wbuiltin-macro-redefined
warnings in the specific cases of __TIME__ and __DATE. Just an idea,
the maintainers would need to say if they would accept such an option.

Cheers,

Manuel.


Re: [C++ Patch] PR 60365

2015-06-30 Thread Paolo Carlini

Hi,

On 06/30/2015 08:31 PM, Jason Merrill wrote:

OK.
Thanks Jason. If you don't object, I propose to commit instead the 
simpler version below, which makes for better error-recovery in case of 
multiple errors.


Thanks again,
Paolo.

/


Index: cp/parser.c
===
--- cp/parser.c (revision 225177)
+++ cp/parser.c (working copy)
@@ -22532,6 +22547,35 @@ cp_parser_std_attribute (cp_parser *parser)
   return attribute;
 }
 
+/* Check that the attribute ATTRIBUTE appears at most once in the
+   attribute-list ATTRIBUTES.  This is enforced for noreturn (7.6.3)
+   and deprecated (7.6.5).  Note that carries_dependency (7.6.4)
+   isn't implemented yet in GCC.  */
+
+static bool
+cp_parser_check_std_attribute (tree attributes, tree attribute)
+{
+  if (attributes)
+{
+  tree name = get_attribute_name (attribute);
+  if (is_attribute_p (noreturn, name)
+  lookup_attribute (noreturn, attributes))
+   {
+ error (attribute noreturn can appear at most once 
+in an attribute-list);
+ return false;
+   }
+  else if (is_attribute_p (deprecated, name)
+   lookup_attribute (deprecated, attributes))
+   {
+ error (attribute deprecated can appear at most once 
+in an attribute-list);
+ return false;
+   }
+}
+  return true;
+}
+
 /* Parse a list of standard C++-11 attributes.
 
attribute-list:
@@ -22554,6 +22598,8 @@ cp_parser_std_attribute_list (cp_parser *parser)
break;
   if (attribute != NULL_TREE)
{
+ if (!cp_parser_check_std_attribute (attributes, attribute))
+   break;
  TREE_CHAIN (attribute) = attributes;
  attributes = attribute;
}
Index: testsuite/g++.dg/cpp0x/gen-attrs-60.C
===
--- testsuite/g++.dg/cpp0x/gen-attrs-60.C   (revision 0)
+++ testsuite/g++.dg/cpp0x/gen-attrs-60.C   (working copy)
@@ -0,0 +1,4 @@
+// PR c++/60365
+// { dg-do compile { target c++11 } }
+
+void func [[noreturn, noreturn]] (); // { dg-error at most once }
Index: testsuite/g++.dg/cpp1y/attr-deprecated-2.C
===
--- testsuite/g++.dg/cpp1y/attr-deprecated-2.C  (revision 0)
+++ testsuite/g++.dg/cpp1y/attr-deprecated-2.C  (working copy)
@@ -0,0 +1,4 @@
+// PR c++/60365
+// { dg-do compile { target c++14 } }
+
+void func [[deprecated, deprecated]] (); // { dg-error at most once }


max_load_factor constant complexity

2015-06-30 Thread François Dumont
Hi

During a recent discussion on Reflector about max_load_factor some
pointed that libstdc++ has not the constant complexity as imposed by the
Standard in Table 103 because we try to respect the new factor by
potentially rehashing the container. This patch fix this problem by
adopting VS Standard Library behavior of retaining the targeted
max_load_factor and comply to it as soon as possible on insertion.

* include/bits/hashtable.h (_Hashtable::__rehash_policy): Remove
container rehash.
* testsuite/23_containers/unordered_set/max_load_factor/robustness.cc:
Adapt.

Tested under linux x86_64.

Ok to commit ?

François



diff --git libstdc++-v3/include/bits/hashtable.h libstdc++-v3/include/bits/hashtable.h
index 31d237e..19d7ee7 100644
--- libstdc++-v3/include/bits/hashtable.h
+++ libstdc++-v3/include/bits/hashtable.h
@@ -595,7 +595,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return _M_rehash_policy; }
 
   void
-  __rehash_policy(const _RehashPolicy);
+  __rehash_policy(const _RehashPolicy __pol)
+  { _M_rehash_policy = __pol; }
 
   // Lookup.
   iterator
@@ -1285,22 +1286,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _Alloc, typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash, typename _RehashPolicy,
 	   typename _Traits
-void
-_Hashtable_Key, _Value, _Alloc, _ExtractKey, _Equal,
-	   _H1, _H2, _Hash, _RehashPolicy, _Traits::
-__rehash_policy(const _RehashPolicy __pol)
-{
-  auto __do_rehash =
-	__pol._M_need_rehash(_M_bucket_count, _M_element_count, 0);
-  if (__do_rehash.first)
-	_M_rehash(__do_rehash.second, _M_rehash_policy._M_state());
-  _M_rehash_policy = __pol;
-}
-
-  templatetypename _Key, typename _Value,
-	   typename _Alloc, typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash, typename _RehashPolicy,
-	   typename _Traits
 auto
 _Hashtable_Key, _Value, _Alloc, _ExtractKey, _Equal,
 	   _H1, _H2, _Hash, _RehashPolicy, _Traits::
diff --git libstdc++-v3/testsuite/23_containers/unordered_set/max_load_factor/robustness.cc libstdc++-v3/testsuite/23_containers/unordered_set/max_load_factor/robustness.cc
index a72829e..5978228 100644
--- libstdc++-v3/testsuite/23_containers/unordered_set/max_load_factor/robustness.cc
+++ libstdc++-v3/testsuite/23_containers/unordered_set/max_load_factor/robustness.cc
@@ -32,41 +32,47 @@ void test01()
   int val = 0;
   for (; val != 100; ++val)
 {
-  VERIFY( us.insert(val).second) ;
+  VERIFY( us.insert(val).second );
   VERIFY( us.load_factor() = us.max_load_factor() );
 }
 
   float cur_max_load_factor = us.max_load_factor();
   int counter = 0;
   std::size_t thrown_exceptions = 0;
+
+  // Reduce max load factor.
+  us.max_load_factor(us.max_load_factor() / 2);
+
+  // At this point load factor is higher than max_load_factor because we can't
+  // rehash in max_load_factor call.
+  VERIFY( us.load_factor()  us.max_load_factor() );
+
   while (true)
 {
   __gnu_cxx::limit_condition::set_limit(counter++);
   bool do_break = false;
   try
 	{
-	  us.max_load_factor(.5f);
+	  size_t nbkts = us.bucket_count();
+	  // Check that unordered_set will still be correctly resized when
+	  // needed.
+	  VERIFY( us.insert(val++).second );
+
+	  VERIFY( us.bucket_count() != nbkts );
+	  VERIFY( us.load_factor() = us.max_load_factor() );
 	  do_break = true;
 	}
   catch (const __gnu_cxx::forced_error)
 	{
-	  VERIFY( us.max_load_factor() == cur_max_load_factor );
+	  // max load factor doesn't change.
+	  VERIFY( us.max_load_factor() == .5f );
 	  ++thrown_exceptions;
 	}
-  // Lets check that unordered_set will still be correctly resized
-  // when needed
-  __gnu_cxx::limit_condition::set_limit(nl_size_t::max());
-  for (;;)
-	{
-	  VERIFY( us.load_factor() = us.max_load_factor() );
-	  size_t nbkts = us.bucket_count();
-	  VERIFY( us.insert(val++).second );
-	  if (us.bucket_count() != nbkts)
-	break;
-	}
+
   if (do_break)
 	break;
 }
+
   VERIFY( thrown_exceptions  0 );
 }
 



RFA: Use target-insns.def for casesi and tablejump

2015-06-30 Thread Richard Sandiford
This patch uses target-insns.def to define casesi and tablejump.
It's the first point in the series at which we need access to the
instruction code, so the patch adds that to targetm as well.

Bootstrapped  regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
Also tested via config-list.mk.  OK to install?

Thanks,
Richard


gcc/
* target.def: Add code_for_* hooks.
* gentarget-def.c (def_target_insn): Add TARGET_CODE_FOR_* macros.
* defaults.h (HAVE_tablejump, gen_tablejump): Delete.
* target-insns.def (casesi, tablejump): New targetm instruction
patterns.
* expr.c (try_casesi): Use them instead of HAVE_*/gen_* interface.
(do_tablejump): Likewise.
* stmt.c (expand_switch_as_decision_tree_p): Likewise.
(expand_sjlj_dispatch_table): Likewise.
* targhooks.c (default_case_values_threshold): Likewise.

Index: gcc/target.def
===
--- gcc/target.def  2015-06-30 21:57:42.678281095 +0100
+++ gcc/target.def  2015-06-30 21:57:42.674281140 +0100
@@ -5877,6 +5877,11 @@ HOOK_VECTOR_END (mode_switching)
 #include target-insns.def
 #undef DEF_TARGET_INSN
 
+#define DEF_TARGET_INSN(NAME, PROTO) \
+  DEFHOOKPOD (code_for_##NAME, *, enum insn_code, CODE_FOR_nothing)
+#include target-insns.def
+#undef DEF_TARGET_INSN
+
 /* Close the 'struct gcc_target' definition.  */
 HOOK_VECTOR_END (C90_EMPTY_HACK)
 
Index: gcc/gentarget-def.c
===
--- gcc/gentarget-def.c 2015-06-30 21:57:42.678281095 +0100
+++ gcc/gentarget-def.c 2015-06-30 21:57:42.674281140 +0100
@@ -189,6 +189,13 @@ def_target_insn (const char *name, const
 printf (invalid_%s\n, suffix);
   else
 printf (target_gen_%s\n, name);
+
+  printf (#undef TARGET_CODE_FOR_%s\n, upper_name);
+  printf (#define TARGET_CODE_FOR_%s , upper_name);
+  if (truth == 0)
+printf (CODE_FOR_nothing\n);
+  else
+printf (CODE_FOR_%s\n, name);
 }
 
 int
Index: gcc/defaults.h
===
--- gcc/defaults.h  2015-06-30 21:57:42.678281095 +0100
+++ gcc/defaults.h  2015-06-30 21:57:42.670281186 +0100
@@ -1426,16 +1426,6 @@ #define STACK_CHECK_MAX_VAR_SIZE (STACK_
 #define TARGET_VTABLE_USES_DESCRIPTORS 0
 #endif
 
-#ifndef HAVE_tablejump
-#define HAVE_tablejump 0
-static inline rtx
-gen_tablejump (rtx, rtx)
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
 #endif /* GCC_INSN_FLAGS_H  */
 
 #endif  /* ! GCC_DEFAULTS_H */
Index: gcc/target-insns.def
===
--- gcc/target-insns.def2015-06-30 21:57:42.678281095 +0100
+++ gcc/target-insns.def2015-06-30 21:57:42.674281140 +0100
@@ -31,6 +31,7 @@
 
Instructions should be documented in md.texi rather than here.  */
 DEF_TARGET_INSN (canonicalize_funcptr_for_compare, (rtx x0, rtx x1))
+DEF_TARGET_INSN (casesi, (rtx x0, rtx x1, rtx x2, rtx x3, rtx x4))
 DEF_TARGET_INSN (epilogue, (void))
 DEF_TARGET_INSN (jump, (rtx x0))
 DEF_TARGET_INSN (load_multiple, (rtx x0, rtx x1, rtx x2))
@@ -42,3 +43,4 @@ DEF_TARGET_INSN (return, (void))
 DEF_TARGET_INSN (sibcall_epilogue, (void))
 DEF_TARGET_INSN (simple_return, (void))
 DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
Index: gcc/expr.c
===
--- gcc/expr.c  2015-06-30 21:57:42.678281095 +0100
+++ gcc/expr.c  2015-06-30 21:57:42.670281186 +0100
@@ -11058,14 +11058,6 @@ do_store_flag (sepops ops, rtx target, m
  !TYPE_UNSIGNED (ops-type)) ? -1 : 1);
 }
 
-
-/* Stubs in case we haven't got a casesi insn.  */
-#ifndef HAVE_casesi
-# define HAVE_casesi 0
-# define gen_casesi(a, b, c, d, e) (0)
-# define CODE_FOR_casesi CODE_FOR_nothing
-#endif
-
 /* Attempt to generate a casesi instruction.  Returns 1 if successful,
0 otherwise (i.e. if there is no casesi instruction).
 
@@ -11080,7 +11072,7 @@ try_casesi (tree index_type, tree index_
   machine_mode index_mode = SImode;
   rtx op1, op2, index;
 
-  if (! HAVE_casesi)
+  if (! targetm.have_casesi ())
 return 0;
 
   /* Convert the index to SImode.  */
@@ -11124,7 +6,7 @@ try_casesi (tree index_type, tree index_
   create_fixed_operand (ops[4], (default_label
  ? default_label
  : fallback_label));
-  expand_jump_insn (CODE_FOR_casesi, 5, ops);
+  expand_jump_insn (targetm.code_for_casesi, 5, ops);
   return 1;
 }
 
@@ -11197,7 +11189,7 @@ do_tablejump (rtx index, machine_mode mo
   vector = gen_const_mem (CASE_VECTOR_MODE, index);
   convert_move (temp, vector, 0);
 
-  emit_jump_insn (gen_tablejump (temp, table_label));
+  emit_jump_insn (targetm.gen_tablejump (temp, table_label));
 
   /* If we are generating PIC code or if the table is PC-relative, the
  

Re: [C++ Patch] PR 60365

2015-06-30 Thread Paolo Carlini

oops, I mean the below.

Paolo.


Index: cp/parser.c
===
--- cp/parser.c (revision 225177)
+++ cp/parser.c (working copy)
@@ -22532,6 +22547,28 @@ cp_parser_std_attribute (cp_parser *parser)
   return attribute;
 }
 
+/* Check that the attribute ATTRIBUTE appears at most once in the
+   attribute-list ATTRIBUTES.  This is enforced for noreturn (7.6.3)
+   and deprecated (7.6.5).  Note that carries_dependency (7.6.4)
+   isn't implemented yet in GCC.  */
+
+static void
+cp_parser_check_std_attribute (tree attributes, tree attribute)
+{
+  if (attributes)
+{
+  tree name = get_attribute_name (attribute);
+  if (is_attribute_p (noreturn, name)
+  lookup_attribute (noreturn, attributes))
+   error (attribute noreturn can appear at most once 
+  in an attribute-list);
+  else if (is_attribute_p (deprecated, name)
+   lookup_attribute (deprecated, attributes))
+   error (attribute deprecated can appear at most once 
+  in an attribute-list);
+}
+}
+
 /* Parse a list of standard C++-11 attributes.
 
attribute-list:
@@ -22554,6 +22591,7 @@ cp_parser_std_attribute_list (cp_parser *parser)
break;
   if (attribute != NULL_TREE)
{
+ cp_parser_check_std_attribute (attributes, attribute);
  TREE_CHAIN (attribute) = attributes;
  attributes = attribute;
}
Index: testsuite/g++.dg/cpp0x/gen-attrs-60.C
===
--- testsuite/g++.dg/cpp0x/gen-attrs-60.C   (revision 0)
+++ testsuite/g++.dg/cpp0x/gen-attrs-60.C   (working copy)
@@ -0,0 +1,4 @@
+// PR c++/60365
+// { dg-do compile { target c++11 } }
+
+void func [[noreturn, noreturn]] (); // { dg-error at most once }
Index: testsuite/g++.dg/cpp1y/attr-deprecated-2.C
===
--- testsuite/g++.dg/cpp1y/attr-deprecated-2.C  (revision 0)
+++ testsuite/g++.dg/cpp1y/attr-deprecated-2.C  (working copy)
@@ -0,0 +1,4 @@
+// PR c++/60365
+// { dg-do compile { target c++14 } }
+
+void func [[deprecated, deprecated]] (); // { dg-error at most once }


[PATCH] Graphite cannot handle return stmt

2015-06-30 Thread Aditya Kumar
No regressions.

2015-06-29  Aditya Kumar  aditya...@samsung.com
Sebastian Pop s@samsung.com

* graphite-scop-detection.c (stmt_simple_for_scop_p): Bail out in case 
of a return statement.

---
 gcc/graphite-scop-detection.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index e8ddecd..a14142f 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -364,7 +364,6 @@ stmt_simple_for_scop_p (basic_block scop_entry, loop_p 
outermost_loop,
 
   switch (gimple_code (stmt))
 {
-case GIMPLE_RETURN:
 case GIMPLE_LABEL:
   return true;
 
-- 
2.1.0.243.g30d45f7



[PATCH, committed] fixes to gcc_jit_context_dump_reproducer_to_file

2015-06-30 Thread David Malcolm
Tested with make check-jit and visual inspection; jit.sum continues
to have 8014 passes.

Committed to trunk as r225204.

gcc/jit/ChangeLog:
* jit-recording.c
(gcc::jit::recording::context::dump_reproducer_to_file):
Add pragma to generated reproducers to disable -Wunused-variable.
Fix handling of NULL string options.
---
 gcc/jit/jit-recording.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/jit/jit-recording.c b/gcc/jit/jit-recording.c
index f379b58..11b9739 100644
--- a/gcc/jit/jit-recording.c
+++ b/gcc/jit/jit-recording.c
@@ -1494,6 +1494,7 @@ recording::context::dump_reproducer_to_file (const char 
*path)
   print_version (r.get_file (),   , false);
   r.write (*/\n);
   r.write (#include libgccjit.h\n\n);
+  r.write (#pragma GCC diagnostic ignored \-Wunused-variable\\n\n);
   r.write (static void\nset_options ();
   r.write_params (contexts);
   r.write ();\n\n);
@@ -1564,12 +1565,17 @@ recording::context::dump_reproducer_to_file (const char 
*path)
 
   r.write (  /* String options.  */\n);
   for (int opt_idx = 0; opt_idx  GCC_JIT_NUM_STR_OPTIONS; opt_idx++)
-   r.write (  gcc_jit_context_set_str_option (%s,\n
-  %s,\n
-  \%s\);\n,
-r.get_identifier (contexts[ctxt_idx]),
-str_option_reproducer_strings[opt_idx],
-m_str_options[opt_idx] ? m_str_options[opt_idx] : NULL);
+   {
+ r.write (  gcc_jit_context_set_str_option (%s,\n
+%s,\n,
+  r.get_identifier (contexts[ctxt_idx]),
+  str_option_reproducer_strings[opt_idx]);
+ if (m_str_options[opt_idx])
+   r.write (  \%s\);\n,
+m_str_options[opt_idx]);
+ else
+   r.write (  NULL);\n);
+   }
   r.write (  /* Int options.  */\n);
   for (int opt_idx = 0; opt_idx  GCC_JIT_NUM_INT_OPTIONS; opt_idx++)
r.write (  gcc_jit_context_set_int_option (%s,\n
-- 
1.8.5.3



[PATCH, committed] PR jit/66628: add gcc_jit_context_add_command_line_option

2015-06-30 Thread David Malcolm
This patch adds a new entrypoint to libgccjit:
  gcc_jit_context_add_command_line_option
allowing client code to add arbitrary command-line options without
needing to recompile libgccjit.so

This is the first new entrypoint since the initial release of
libgccjit, so the patch also adds symbol-versioning to libgccjit.so,
putting all existing symbols within LIBGCCJIT_ABI_0, adding the new
symbol within LIBGCCJIT_ABI_1.  It adds a discussion of API/ABI
stability to the documentation.

Tested with manually running old client binaries against a new
libgccjit.so, and via make check-jit; jit.sum goes from 8014 to
8039 passes.

Committed to trunk as r225205.

gcc/jit/ChangeLog:
PR jit/66628
* docs/cp/topics/contexts.rst (Additional command-line options):
New section.
* docs/topics/compatibility.rst: New file.
* docs/topics/contexts.rst (Additional command-line options): New
section.
* docs/topics/index.rst: Add compatibility.rst.
* docs/_build/texinfo/libgccjit.texi: Regenerate.
* jit-playback.c (make_fake_args): Add call to
append_command_line_options.
* jit-recording.c: Within namespace gcc::jit...
(recording::context::~context): Free the optnames within
m_command_line_options.
(recording::context::set_bool_option): Likewise.
(recording::context::add_command_line_option): New method.
(recording::context::append_command_line_options): New method.
(recording::context::dump_reproducer_to_file): Add command-line
options.
* jit-recording.h: Within namespace gcc::jit...
(recording::context::add_command_line_option): New method.
(recording::context::append_command_line_options): New method.
(recording::context::m_command_line_options): New field.
* libgccjit++.h (gccjit::context::add_command_line_option): New
method.
* libgccjit.c (gcc_jit_context_add_command_line_option): New API
entrypoint.
* libgccjit.h (gcc_jit_context_add_command_line_option): New API
entrypoint.
(LIBGCCJIT_HAVE_gcc_jit_context_add_command_line_option): New
macro.
* libgccjit.map: Put existing symbols within LIBGCCJIT_ABI_0; add
LIBGCCJIT_ABI_1 and gcc_jit_context_add_command_line_option.

gcc/testsuite/ChangeLog:
PR jit/66628
* jit.dg/all-non-failing-tests.h: Add note about
test-extra-options.c.
* jit.dg/test-extra-options.c: New testcase.
---
 gcc/jit/docs/cp/topics/contexts.rst  |  19 
 gcc/jit/docs/topics/compatibility.rst|  90 ++
 gcc/jit/docs/topics/contexts.rst |  36 +++
 gcc/jit/docs/topics/index.rst|   1 +
 gcc/jit/jit-playback.c   |   4 +
 gcc/jit/jit-recording.c  |  42 +
 gcc/jit/jit-recording.h  |   7 ++
 gcc/jit/libgccjit++.h|   8 ++
 gcc/jit/libgccjit.c  |  19 
 gcc/jit/libgccjit.h  |  23 +
 gcc/jit/libgccjit.map|  11 ++-
 gcc/testsuite/jit.dg/all-non-failing-tests.h |   3 +
 gcc/testsuite/jit.dg/test-extra-options.c| 136 +++
 13 files changed, 398 insertions(+), 1 deletion(-)
 create mode 100644 gcc/jit/docs/topics/compatibility.rst
 create mode 100644 gcc/testsuite/jit.dg/test-extra-options.c

diff --git a/gcc/jit/docs/cp/topics/contexts.rst 
b/gcc/jit/docs/cp/topics/contexts.rst
index 12a4e50..b26a29d 100644
--- a/gcc/jit/docs/cp/topics/contexts.rst
+++ b/gcc/jit/docs/cp/topics/contexts.rst
@@ -196,3 +196,22 @@ Integer options
   This is a thin wrapper around the C API
   :c:func:`gcc_jit_context_set_int_option`; the options have the same
   meaning.
+
+Additional command-line options
+***
+
+.. function:: void \
+  gccjit::context::add_command_line_option (const char *optname)
+
+   Add an arbitrary gcc command-line option to the context for use
+   when compiling.
+
+   This is a thin wrapper around the C API
+   :c:func:`gcc_jit_context_add_command_line_option`.
+
+   This entrypoint was added in :ref:`LIBGCCJIT_ABI_1`; you can test for
+   its presence using
+
+   .. code-block:: c
+
+  #ifdef LIBGCCJIT_HAVE_gcc_jit_context_add_command_line_option
diff --git a/gcc/jit/docs/topics/compatibility.rst 
b/gcc/jit/docs/topics/compatibility.rst
new file mode 100644
index 000..dff1d0b
--- /dev/null
+++ b/gcc/jit/docs/topics/compatibility.rst
@@ -0,0 +1,90 @@
+.. Copyright (C) 2015 Free Software Foundation, Inc.
+   Originally contributed by David Malcolm dmalc...@redhat.com
+
+   This is free software: you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is 

[PATCH, committed] PR jit/66546: Add gcc_jit_context_set_bool_allow_unreachable_blocks

2015-06-30 Thread David Malcolm
Normally libgccjit verifies that all blocks within a function are
reachable by some path from the entry block, issuing a hard error
if it encounters an unreachable block, which is useful for the common
case.

However, some bytecode interpreters can generate unreachable bytecodes,
so it's useful when JIT-compiling them to have a way to turn off this
verification.

This patch provides a way to turn off this check, by adding a new
boolean option.

However, adding it to enum gcc_jit_bool_option is unsatisfactory,
since there would be no way to statically identify client binaries
using the new option.

Hence this patch adds a new entrypoint for the option:
  gcc_jit_context_set_bool_allow_unreachable_blocks
within a new ABI tag, LIBGCCJIT_ABI_2.

It adds a new inner_bool_option enum internally, for such new
options, which is not exposed as part of the public API.

Tested via make check-jit; jit.sum goes from 8039 to 8054 passes.

Committed to trunk as r225206.

gcc/jit/ChangeLog:
PR jit/66546
* docs/cp/topics/contexts.rst
(gccjit::context::set_bool_allow_unreachable_blocks): New.
* docs/topics/compatibility.rst (LIBGCCJIT_ABI_2): New.
* docs/topics/contexts.rst (Options): Add notes discussing the
transition from enums to entrypoints for new options.
(gcc_jit_context_set_bool_allow_unreachable_blocks): New.
* docs/_build/texinfo/libgccjit.texi: Regenerate.
* jit-common.h (gcc::jit::inner_bool_option): New enum.
* jit-recording.c: Within namespace gcc::jit...
(recording::context::context): Handle m_inner_bool_options.
(recording::context::set_inner_bool_option): New.
(inner_bool_option_reproducer_strings): New.
(recording::context::log_all_options): Log the inner bool
options.
(recording::context::log_inner_bool_option): New.
(recording::context::dump_reproducer_to_file): Write initializers
for inner bool options.
(recording::function::validate): Don't check for block
reachability if INNER_BOOL_OPTION_ALLOW_UNREACHABLE_BLOCKS is set.
* jit-recording.h: Within namespace gcc::jit...
(recording::context::set_inner_bool_option): New.
(recording::context::get_inner_bool_option): New.
(recording::context::log_inner_bool_option): New.
(recording::context::m_inner_bool_options): New.
* libgccjit++.h
(gccjit::context::set_bool_allow_unreachable_blocks): New.
* libgccjit.c
(gcc_jit_context_set_bool_allow_unreachable_blocks): New.
* libgccjit.h: Add note about options present in the
initial release of libgccjit.
(gcc_jit_context_set_bool_allow_unreachable_blocks): New API
entrypoint.
(LIBGCCJIT_HAVE_gcc_jit_context_set_bool_allow_unreachable_blocks):
New macro.
* libgccjit.map (LIBGCCJIT_ABI_2): New, containing...
(gcc_jit_context_set_bool_allow_unreachable_blocks): ...this new
entrypoint.

gcc/testsuite/ChangeLog:
PR jit/66546
* jit.dg/all-non-failing-tests.h: Add note about
test-validly-unreachable-block.c.
* jit.dg/test-validly-unreachable-block.c: New file.
---
 gcc/jit/docs/cp/topics/contexts.rst| 17 
 gcc/jit/docs/topics/compatibility.rst  |  7 +++
 gcc/jit/docs/topics/contexts.rst   | 29 +++-
 gcc/jit/jit-common.h   | 10 +
 gcc/jit/jit-recording.c| 42 +-
 gcc/jit/jit-recording.h| 12 +
 gcc/jit/libgccjit++.h  |  8 
 gcc/jit/libgccjit.c| 17 
 gcc/jit/libgccjit.h| 28 
 gcc/jit/libgccjit.map  |  6 +++
 gcc/testsuite/jit.dg/all-non-failing-tests.h   |  4 ++
 .../jit.dg/test-validly-unreachable-block.c| 51 ++
 12 files changed, 228 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/jit.dg/test-validly-unreachable-block.c

diff --git a/gcc/jit/docs/cp/topics/contexts.rst 
b/gcc/jit/docs/cp/topics/contexts.rst
index b26a29d..162e4ae 100644
--- a/gcc/jit/docs/cp/topics/contexts.rst
+++ b/gcc/jit/docs/cp/topics/contexts.rst
@@ -184,6 +184,23 @@ Boolean options
   :c:func:`gcc_jit_context_set_bool_option`; the options have the same
   meaning.
 
+.. function:: void \
+  gccjit::context::set_bool_allow_unreachable_blocks (int 
bool_value)
+
+   By default, libgccjit will issue an error about unreachable blocks
+   within a function.
+
+   This entrypoint can be used to disable that error; it is a thin wrapper
+   around the C API
+   :c:func:`gcc_jit_context_set_bool_allow_unreachable_blocks`.
+
+   This entrypoint was added in :ref:`LIBGCCJIT_ABI_2`; you can test for
+   its presence using
+
+   .. code-block:: c
+
+  #ifdef 

[PATCH, committed] jit: add switch statements

2015-06-30 Thread David Malcolm
This is a revised, combined version of the patches posted here:
  https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01858.html
([PATCH 1/2] Add gcc/typed-splay-tree.h)

  https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01859.html
([PATCH 2/2] jit: add switch statements)

with the fix to patch 1/2 from:
  https://gcc.gnu.org/ml/gcc-patches/2015-06/msg02103.html

The patch extends the C and C++ APIs (with a new symbol tag
LIBGCCJIT_ABI_3), adding documentation and testcases.  I also
updated jit-playback.c's add_case to set DECL_CONTEXT on the
artificial label, preventing an error in verify_gimple_label
when configured with checking enabled.

Tested with make check-jit; jit.sum goes from 8054 to 8234 passes.

Committed to trunk as r225207.

gcc/ChangeLog:
* typed-splay-tree.h: New file.

gcc/jit/ChangeLog:
* docs/cp/topics/functions.rst (Blocks): Add switch statements to
list of ways to terminate a block.
(gccjit::block::end_with_switch): Add function description.
(gccjit::case_): Add class.
(gccjit::context::new_case): Add function description.
* docs/cp/topics/objects.rst: Add case_ to class hierarchy.
* docs/topics/compatibility.rst (LIBGCCJIT_ABI_3): New.
* docs/topics/functions.rst (Blocks): Add switch statements to
list of ways to terminate a block.
(gcc_jit_block_end_with_switch): Add function description.
(gcc_jit_case): Add type.
(gcc_jit_context_new_case): Add function description.
(gcc_jit_case_as_object): Add function description.
* docs/topics/objects.rst: Add gcc_jit_case to class hierarchy.
* docs/_build/texinfo/libgccjit.texi: Regenerate.
* jit-common.h (gcc::jit::recording::case_): Add forward decl.
(gcc::jit::playback::case_): Add forward decl.
* jit-playback.c (add_case): New function.
(gcc::jit::playback::block::add_switch): New function.
* jit-playback.h (gcc::jit::playback::case_): New struct.
(gcc::jit::playback::block::get_function): New method.
(gcc::jit::playback::block::add_switch): New method.
* jit-recording.c: Within namespace gcc::jit...
(recording::context::new_case): New method.
(recording::function::validate): Update for change to
get_successor_blocks.
(recording::block::end_with_switch): New method.
(recording::block::get_successor_blocks): Update to support an
arbitrary number of successor blocks.
(recording::block::dump_edges_to_dot): Likewise.
(memento_of_new_rvalue_from_const int::get_wide_int): New.
(memento_of_new_rvalue_from_const long::get_wide_int): New.
(memento_of_new_rvalue_from_const double::get_wide_int): New.
(memento_of_new_rvalue_from_const void *::get_wide_int): New.
(recording::statement::get_successor_blocks): Update to support an
arbitrary number of successor blocks.
(recording::conditional::get_successor_blocks): Likewise.
(recording::jump::get_successor_blocks): Likewise.
(recording::return_::get_successor_blocks): Likewise.
(recording::case_::write_reproducer): New.
(recording::case_::make_debug_string): New.
(recording::switch_::switch_): New.
(recording::switch_::replay_into): New.
(recording::switch_::get_successor_blocks): New.
(recording::switch_::make_debug_string): New.
(recording::switch_::write_reproducer): New.
* jit-recording.h: Within namespace gcc::jit::recording...
(context::new_case): New.
(rvalue::is_constant): New.
(rvalue::get_wide_int): New.
(block::end_with_switch): New.
(block::get_successor_blocks): Update to support an arbitrary
number of successor blocks.
(memento_of_new_rvalue_from_const::is_constant): New.
(memento_of_new_rvalue_from_const::get_wide_int): New.
(statement::get_successor_blocks): Update to support an arbitrary
number of successor blocks.
(conditional::get_successor_blocks): Likewise.
(jump::get_successor_blocks): Likewise.
(return_::get_successor_blocks): Likewise.
(case_): New subclass of memento.
(switch_): New subclass of statement.
* libgccjit++.h (gccjit::case_): New subclass of gccjit::object.
(gccjit::context::new_case): New method.
(gccjit::block::end_with_switch): New method.
(gccjit::case_::case): New ctors.
(gccjit::case_::get_inner_case): New method.
* libgccjit.c: Include typed-splay-tree.h
(struct gcc_jit_case): New.
(gcc_jit_context_new_case): New function.
(gcc_jit_case_as_object): New function.
(valid_dest_for_switch): New function.
(valid_case_for_switch): New function.
(class api_call_validator): New class.
(class case_range_validator): New class.

Re: [PATCH] Fix PR c++/66686 (dependent template template substitution)

2015-06-30 Thread Patrick Palka
On Sat, Jun 27, 2015 at 2:37 PM, Patrick Palka patr...@parcs.ath.cx wrote:
 This patch makes coerce_template_template_parm consider a coercion
 successful if the result of calling tsubst on a template template parm
 is a dependent type even when the desired argument type is non-dependent.

 For the test case below, TREE_TYPE (parm) is B and TREE_TYPE (arg) is
 int.  After calling tsubst on the parm the resulting type is still B, a
 dependent type.  Without checking that the resulting type is dependent,
 the function would return 0 because same_type_p would return false.

 Bootstrap + regtest of this change was successful on
 x86_64-unknown-linux-gnu.  Does this patch look OK?

 gcc/cp/ChangeLog:

 * pt.c (coerce_template_template_parm) [PARM_DECL]: Don't
 return 0 if tsubst returns a dependent type.

 gcc/testsuite/ChangeLog:

 * g++.dg/template/pr66686.C: New test.
 ---
  gcc/cp/pt.c | 12 +++-
  gcc/testsuite/g++.dg/template/pr66686.C | 15 +++
  2 files changed, 22 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/pr66686.C

 diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
 index 2b37460..d7768a8 100644
 --- a/gcc/cp/pt.c
 +++ b/gcc/cp/pt.c
 @@ -6357,11 +6357,13 @@ coerce_template_template_parm (tree parm,
Dint, C d;

  i.e. the parameter list of TT depends on earlier parameters.  */
 -  if (!uses_template_parms (TREE_TYPE (arg))
 -  !same_type_p
 -   (tsubst (TREE_TYPE (parm), outer_args, complain, in_decl),
 -TREE_TYPE (arg)))
 -   return 0;
 +  if (!uses_template_parms (TREE_TYPE (arg)))
 +   {
 + tree t = tsubst (TREE_TYPE (parm), outer_args, complain, in_decl);
 + if (!uses_template_parms (t)
 +  !same_type_p (t, TREE_TYPE (arg)))
 +   return 0;
 +   }

if (TEMPLATE_PARM_PARAMETER_PACK (DECL_INITIAL (arg))
!TEMPLATE_PARM_PARAMETER_PACK (DECL_INITIAL (parm)))
 diff --git a/gcc/testsuite/g++.dg/template/pr66686.C 
 b/gcc/testsuite/g++.dg/template/pr66686.C
 new file mode 100644
 index 000..d8aea62
 --- /dev/null
 +++ b/gcc/testsuite/g++.dg/template/pr66686.C
 @@ -0,0 +1,15 @@
 +// PR c++/66686
 +
 +template int
 +struct Y { };
 +
 +template class B, template template B class Z class C
 +struct X
 +{
 +  CY a;  // { dg-bogus mismatch }
 +};
 +
 +template template int class
 +struct A { };
 +
 +Xint, A a;
 --
 2.5.0.rc0.5.g91e10c5.dirty


Early ping because I forgot to CC Jason.


[committed] Use target-insns.def for memory fences barriers

2015-06-30 Thread Richard Sandiford
Bootstrapped  regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
Also tested via config-list.mk.  Committed as preapproved.

Thanks,
Richard


gcc/
* defaults.h (HAVE_mem_thread_fence, gen_mem_thread_fence)
(HAVE_memory_barrier, gen_memory_barrier, HAVE_mem_signal_fence)
(gen_mem_signal_fence): Delete.
* target-insns.def (mem_signal_fence, mem_thread_fence)
(memory_barrier): New targetm instruction patterns.
* optabs.c (expand_mem_thread_fence): Use them instead of HAVE_*/gen_*
interface.
(expand_mem_signal_fence): Likewise.

Index: gcc/defaults.h
===
--- gcc/defaults.h  2015-06-30 21:55:27.731812511 +0100
+++ gcc/defaults.h  2015-06-30 21:55:27.723812601 +0100
@@ -1426,36 +1426,6 @@ #define STACK_CHECK_MAX_VAR_SIZE (STACK_
 #define TARGET_VTABLE_USES_DESCRIPTORS 0
 #endif
 
-#ifndef HAVE_mem_thread_fence
-#define HAVE_mem_thread_fence 0
-static inline rtx
-gen_mem_thread_fence (rtx)
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
-#ifndef HAVE_memory_barrier
-#define HAVE_memory_barrier 0
-static inline rtx
-gen_memory_barrier ()
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
-#ifndef HAVE_mem_signal_fence
-#define HAVE_mem_signal_fence 0
-static inline rtx
-gen_mem_signal_fence (rtx)
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
 #ifndef HAVE_load_multiple
 #define HAVE_load_multiple 0
 static inline rtx
Index: gcc/target-insns.def
===
--- gcc/target-insns.def2015-06-30 21:55:27.731812511 +0100
+++ gcc/target-insns.def2015-06-30 21:55:27.723812601 +0100
@@ -32,6 +32,9 @@
Instructions should be documented in md.texi rather than here.  */
 DEF_TARGET_INSN (canonicalize_funcptr_for_compare, (rtx x0, rtx x1))
 DEF_TARGET_INSN (epilogue, (void))
+DEF_TARGET_INSN (mem_signal_fence, (rtx x0))
+DEF_TARGET_INSN (mem_thread_fence, (rtx x0))
+DEF_TARGET_INSN (memory_barrier, (void))
 DEF_TARGET_INSN (prologue, (void))
 DEF_TARGET_INSN (return, (void))
 DEF_TARGET_INSN (sibcall_epilogue, (void))
Index: gcc/optabs.c
===
--- gcc/optabs.c2015-06-30 21:55:27.731812511 +0100
+++ gcc/optabs.c2015-06-30 21:55:27.723812601 +0100
@@ -7575,12 +7575,12 @@ expand_asm_memory_barrier (void)
 void
 expand_mem_thread_fence (enum memmodel model)
 {
-  if (HAVE_mem_thread_fence)
-emit_insn (gen_mem_thread_fence (GEN_INT (model)));
+  if (targetm.have_mem_thread_fence ())
+emit_insn (targetm.gen_mem_thread_fence (GEN_INT (model)));
   else if (!is_mm_relaxed (model))
 {
-  if (HAVE_memory_barrier)
-   emit_insn (gen_memory_barrier ());
+  if (targetm.have_memory_barrier ())
+   emit_insn (targetm.gen_memory_barrier ());
   else if (synchronize_libfunc != NULL_RTX)
emit_library_call (synchronize_libfunc, LCT_NORMAL, VOIDmode, 0);
   else
@@ -7594,8 +7594,8 @@ expand_mem_thread_fence (enum memmodel m
 void
 expand_mem_signal_fence (enum memmodel model)
 {
-  if (HAVE_mem_signal_fence)
-emit_insn (gen_mem_signal_fence (GEN_INT (model)));
+  if (targetm.have_mem_signal_fence ())
+emit_insn (targetm.gen_mem_signal_fence (GEN_INT (model)));
   else if (!is_mm_relaxed (model))
 {
   /* By default targets are coherent between a thread and the signal



[committed] Use target-insns.def for {load,store}_multiple

2015-06-30 Thread Richard Sandiford
Bootstrapped  regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
Also tested via config-list.mk.  Committed as preapproved.

Thanks,
Richard


gcc/
* defaults.h (HAVE_load_multiple, gen_load_multiple)
(HAVE_store_multiple, gen_store_multiple): Delete.
* target-insns.def (load_multiple, store_multiple): New targetm
instruction patterns.
* expr.c (move_block_to_reg, move_block_from_reg): Use them instead
of HAVE_*/gen_* interface.

Index: gcc/defaults.h
===
--- gcc/defaults.h  2015-06-30 21:56:16.107263462 +0100
+++ gcc/defaults.h  2015-06-30 21:56:16.083263735 +0100
@@ -1426,26 +1426,6 @@ #define STACK_CHECK_MAX_VAR_SIZE (STACK_
 #define TARGET_VTABLE_USES_DESCRIPTORS 0
 #endif
 
-#ifndef HAVE_load_multiple
-#define HAVE_load_multiple 0
-static inline rtx
-gen_load_multiple (rtx, rtx, rtx)
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
-#ifndef HAVE_store_multiple
-#define HAVE_store_multiple 0
-static inline rtx
-gen_store_multiple (rtx, rtx, rtx)
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
 #ifndef HAVE_tablejump
 #define HAVE_tablejump 0
 static inline rtx
Index: gcc/target-insns.def
===
--- gcc/target-insns.def2015-06-30 21:56:16.107263462 +0100
+++ gcc/target-insns.def2015-06-30 21:56:16.083263735 +0100
@@ -32,6 +32,7 @@
Instructions should be documented in md.texi rather than here.  */
 DEF_TARGET_INSN (canonicalize_funcptr_for_compare, (rtx x0, rtx x1))
 DEF_TARGET_INSN (epilogue, (void))
+DEF_TARGET_INSN (load_multiple, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (mem_signal_fence, (rtx x0))
 DEF_TARGET_INSN (mem_thread_fence, (rtx x0))
 DEF_TARGET_INSN (memory_barrier, (void))
@@ -39,3 +40,4 @@ DEF_TARGET_INSN (prologue, (void))
 DEF_TARGET_INSN (return, (void))
 DEF_TARGET_INSN (sibcall_epilogue, (void))
 DEF_TARGET_INSN (simple_return, (void))
+DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
Index: gcc/expr.c
===
--- gcc/expr.c  2015-06-30 21:56:16.107263462 +0100
+++ gcc/expr.c  2015-06-30 21:56:16.083263735 +0100
@@ -1491,10 +1491,6 @@ emit_block_move_via_loop (rtx x, rtx y,
 void
 move_block_to_reg (int regno, rtx x, int nregs, machine_mode mode)
 {
-  int i;
-  rtx pat;
-  rtx_insn *last;
-
   if (nregs == 0)
 return;
 
@@ -1502,12 +1498,12 @@ move_block_to_reg (int regno, rtx x, int
 x = validize_mem (force_const_mem (mode, x));
 
   /* See if the machine can do this with a load multiple insn.  */
-  if (HAVE_load_multiple)
+  if (targetm.have_load_multiple ())
 {
-  last = get_last_insn ();
-  pat = gen_load_multiple (gen_rtx_REG (word_mode, regno), x,
-  GEN_INT (nregs));
-  if (pat)
+  rtx_insn *last = get_last_insn ();
+  rtx first = gen_rtx_REG (word_mode, regno);
+  if (rtx_insn *pat = targetm.gen_load_multiple (first, x,
+GEN_INT (nregs)))
{
  emit_insn (pat);
  return;
@@ -1516,7 +1512,7 @@ move_block_to_reg (int regno, rtx x, int
delete_insns_since (last);
 }
 
-  for (i = 0; i  nregs; i++)
+  for (int i = 0; i  nregs; i++)
 emit_move_insn (gen_rtx_REG (word_mode, regno + i),
operand_subword_force (x, i, mode));
 }
@@ -1527,18 +1523,16 @@ move_block_to_reg (int regno, rtx x, int
 void
 move_block_from_reg (int regno, rtx x, int nregs)
 {
-  int i;
-
   if (nregs == 0)
 return;
 
   /* See if the machine can do this with a store multiple insn.  */
-  if (HAVE_store_multiple)
+  if (targetm.have_store_multiple ())
 {
   rtx_insn *last = get_last_insn ();
-  rtx pat = gen_store_multiple (x, gen_rtx_REG (word_mode, regno),
-   GEN_INT (nregs));
-  if (pat)
+  rtx first = gen_rtx_REG (word_mode, regno);
+  if (rtx_insn *pat = targetm.gen_store_multiple (x, first,
+ GEN_INT (nregs)))
{
  emit_insn (pat);
  return;
@@ -1547,7 +1541,7 @@ move_block_from_reg (int regno, rtx x, i
delete_insns_since (last);
 }
 
-  for (i = 0; i  nregs; i++)
+  for (int i = 0; i  nregs; i++)
 {
   rtx tem = operand_subword (x, i, 1, BLKmode);
 



Re: [patch] fix regrename pass to ensure renamings produce valid insns

2015-06-30 Thread Sandra Loosemore

On 06/30/2015 03:06 AM, Eric Botcazou wrote:

I notice the way gcc_assert() is defined in system.h now, the test won't
disappear even when runtime checks are disabled, though you might still
adjust it to avoid any programmer confusion.


It will disappear at run time, see the definition:

/* Include EXPR, so that unused variable warnings do not occur.  */
#define gcc_assert(EXPR) ((void)(0  (EXPR)))

so you really need to use a separate variable.


Oh, yuck -- it never even occurred to me that gcc_assert could be 
disabled.  I'll bet there are other bugs in GCC due to this very same 
problem of depending on its argument being executed for side-effect. 
(E.g. take a look at add_stmt_to_eh_lp_fn in tree-eh.c.)  Seems like 
lousy design to me especially since proper usage doesn't seem to be 
documented anywhere.


Anyway, I think the attached patch is what's required to fix the 
instance that's my fault.  OK?  Bernd, if this needs testing, can you help?


-Sandra
2015-06-30  Sandra Loosemore san...@codesourcery.com

	gcc/
	* config/c6x/c6x.c (try_rename_operands): Do not depend on
	gcc_assert evaluating its argument for side-effect.
Index: gcc/config/c6x/c6x.c
===
--- gcc/config/c6x/c6x.c	(revision 225202)
+++ gcc/config/c6x/c6x.c	(working copy)
@@ -3450,6 +3450,7 @@ try_rename_operands (rtx_insn *head, rtx
   int best_reg, old_reg;
   vecdu_head_p involved_chains = vNULL;
   unit_req_table new_reqs;
+  bool ok;
 
   for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
 {
@@ -3516,7 +3517,8 @@ try_rename_operands (rtx_insn *head, rtx
   best_reg =
 find_rename_reg (this_head, super_class, unavailable, old_reg, true);
 
-  gcc_assert (regrename_do_replace (this_head, best_reg));
+  ok = regrename_do_replace (this_head, best_reg);
+  gcc_assert (ok);
 
   count_unit_reqs (new_reqs, head, PREV_INSN (tail));
   merge_unit_reqs (new_reqs);
@@ -3529,7 +3531,10 @@ try_rename_operands (rtx_insn *head, rtx
 	   unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
 }
   if (unit_req_imbalance (new_reqs)  unit_req_imbalance (reqs))
-gcc_assert (regrename_do_replace (this_head, old_reg));
+{
+  ok = regrename_do_replace (this_head, old_reg);
+  gcc_assert (ok);
+}
   else
 memcpy (reqs, new_reqs, sizeof (unit_req_table));
 


Re: [patch] Fix ICE during SCC hashing in LTO mode

2015-06-30 Thread Eric Botcazou
 Ok (I suppose this also affects the GCC 5 branch?)

Thanks.  Yes, it does, but I'm not sure if we need to backport it immediately.

-- 
Eric Botcazou


[committed] Use target-insns.def for prologue epilogue insns

2015-06-30 Thread Richard Sandiford
Bootstrapped  regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
Also tested via config-list.mk.  Committed as preapproved.

Thanks,
Richard


gcc/
* defaults.h (HAVE_epilogue, gen_epilogue): Delete.
* target-insns.def (epilogue, prologue, sibcall_prologue): New
targetm instruction patterns.
* alias.c (init_alias_analysis): Use them instead of HAVE_*/gen_*
interface.
* calls.c (expand_call): Likewise.
* cfgrtl.c (cfg_layout_finalize): Likewise.
* df-scan.c (df_get_entry_block_def_set): Likewise.
(df_get_exit_block_use_set): Likewise.
* dwarf2cfi.c (pass_dwarf2_frame::gate): Likewise.
* final.c (final_start_function): Likewise.
* function.c (thread_prologue_and_epilogue_insns): Likewise.
(reposition_prologue_and_epilogue_notes): Likewise.
* reorg.c (find_end_label): Likewise.
* toplev.c (process_options): Likewise.

Index: gcc/defaults.h
===
--- gcc/defaults.h  2015-06-30 21:54:23.984536147 +0100
+++ gcc/defaults.h  2015-06-30 21:54:23.972536284 +0100
@@ -1426,16 +1426,6 @@ #define STACK_CHECK_MAX_VAR_SIZE (STACK_
 #define TARGET_VTABLE_USES_DESCRIPTORS 0
 #endif
 
-#ifndef HAVE_epilogue
-#define HAVE_epilogue 0
-static inline rtx
-gen_epilogue ()
-{
-  gcc_unreachable ();
-  return NULL;
-}
-#endif
-
 #ifndef HAVE_mem_thread_fence
 #define HAVE_mem_thread_fence 0
 static inline rtx
Index: gcc/target-insns.def
===
--- gcc/target-insns.def2015-06-30 21:54:23.984536147 +0100
+++ gcc/target-insns.def2015-06-30 21:54:23.976536238 +0100
@@ -30,6 +30,9 @@
Patterns that take no operands should have a prototype (void).
 
Instructions should be documented in md.texi rather than here.  */
+DEF_TARGET_INSN (canonicalize_funcptr_for_compare, (rtx x0, rtx x1))
+DEF_TARGET_INSN (epilogue, (void))
+DEF_TARGET_INSN (prologue, (void))
 DEF_TARGET_INSN (return, (void))
+DEF_TARGET_INSN (sibcall_epilogue, (void))
 DEF_TARGET_INSN (simple_return, (void))
-DEF_TARGET_INSN (canonicalize_funcptr_for_compare, (rtx x0, rtx x1))
Index: gcc/alias.c
===
--- gcc/alias.c 2015-06-30 21:54:23.984536147 +0100
+++ gcc/alias.c 2015-06-30 21:54:23.972536284 +0100
@@ -3038,6 +3038,14 @@ init_alias_analysis (void)
   rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
   rpo_cnt = pre_and_rev_post_order_compute (NULL, rpo, false);
 
+  /* The prologue/epilogue insns are not threaded onto the
+ insn chain until after reload has completed.  Thus,
+ there is no sense wasting time checking if INSN is in
+ the prologue/epilogue until after reload has completed.  */
+  bool could_be_prologue_epilogue = ((targetm.have_prologue ()
+ || targetm.have_epilogue ())
+ reload_completed);
+
   pass = 0;
   do
 {
@@ -3076,17 +3084,7 @@ init_alias_analysis (void)
{
  rtx note, set;
 
-#if defined (HAVE_prologue)
- static const bool prologue = true;
-#else
- static const bool prologue = false;
-#endif
-
- /* The prologue/epilogue insns are not threaded onto the
-insn chain until after reload has completed.  Thus,
-there is no sense wasting time checking if INSN is in
-the prologue/epilogue until after reload has completed.  */
- if ((prologue || HAVE_epilogue)  reload_completed
+ if (could_be_prologue_epilogue
   prologue_epilogue_contains (insn))
continue;
 
Index: gcc/calls.c
===
--- gcc/calls.c 2015-06-30 21:54:23.984536147 +0100
+++ gcc/calls.c 2015-06-30 21:54:23.972536284 +0100
@@ -2783,13 +2783,8 @@ expand_call (tree exp, rtx target, int i
 try_tail_call = 0;
 
   /*  Rest of purposes for tail call optimizations to fail.  */
-  if (
-#ifdef HAVE_sibcall_epilogue
-  !HAVE_sibcall_epilogue
-#else
-  1
-#endif
-  || !try_tail_call
+  if (!try_tail_call
+  || !targetm.have_sibcall_epilogue ()
   /* Doing sibling call optimization needs some work, since
 structure_value_addr can be allocated on the stack.
 It does not seem worth the effort since few optimizable
Index: gcc/cfgrtl.c
===
--- gcc/cfgrtl.c2015-06-30 21:54:23.984536147 +0100
+++ gcc/cfgrtl.c2015-06-30 21:54:23.972536284 +0100
@@ -4324,7 +4324,7 @@ cfg_layout_finalize (void)
 #endif
   force_one_exit_fallthru ();
   rtl_register_cfg_hooks ();
-  if (reload_completed  !HAVE_epilogue)
+  if (reload_completed  !targetm.have_epilogue ())
 fixup_fallthru_exit_predecessor ();
   

  1   2   >