date:20150702

[gomp4.1] Parsing ordered(n) loops in C/C++

2015-07-02 Thread Jakub Jelinek

Hi!

I've committed following patch to parse collapse+ordered-1 loops
if ordered(n) clause is present, and adjust ompexp, so that we actually
expand it as a collapsed loop with normal ordered-1 loops inside of it.

Example testcase (for now can be parsed and expanded only with the
ordered constructs commented out, Aldy is working on that).

void bar (int, int, int);

void
foo (int n, int m, int o)
{
  int i, j, k;
  #pragma omp for collapse(2) ordered(2)
  for (i = 0; i  m; i++)
{
  for (j = 0; j  n; j++)
for (k = 0; k  o; k++)
  {
#pragma omp ordered depend(sink: i-1,j,k) depend(sink: i,j-1,k-1) depend(sink: 
i-1,j-1,k+1)
bar (i, j, k);
#pragma omp ordered depend(source)
  }
}
}

int
baz ()
{
  int i, j;
#pragma omp parallel for ordered(2)
  for (i=0; i  100; ++i)
for (j=0; j  100; ++j)
  {
#pragma omp ordered depend(sink:i-1,j-3)
bar (i, j, 0);
#pragma omp ordered depend(source)
  }
}

2015-07-02  Jakub Jelinek  ja...@redhat.com

* omp-low.c (struct omp_for_data): Add ordered field.
(extract_omp_for_data): Handle loops with ordered(n) clause.
(expand_omp_for_ordered_loops): New function.
(expand_omp_for_generic): Call it.
c/
* c-parser.c (c_parser_omp_for_loop): Parse collapse + ordered - 1
nested loops if ordered(n) clause is present.
cp/
* parser.c (cp_parser_omp_for_loop): Parse collapse + ordered - 1
nested loops if ordered(n) clause is present.

--- gcc/omp-low.c.jj2015-07-01 12:50:49.0 +0200
+++ gcc/omp-low.c   2015-07-02 09:27:03.546405031 +0200
@@ -236,6 +236,7 @@ struct omp_for_data
   gomp_for *for_stmt;
   tree pre, iter_type;
   int collapse;
+  int ordered;
   bool have_nowait, have_ordered, simd_schedule;
   enum omp_clause_schedule_kind sched_kind;
   struct omp_for_data_loop *loops;
@@ -489,14 +490,15 @@ extract_omp_for_data (gomp_for *for_stmt
 
   fd-for_stmt = for_stmt;
   fd-pre = NULL;
-  fd-collapse = gimple_omp_for_collapse (for_stmt);
-  if (fd-collapse  1)
+  if (gimple_omp_for_collapse (for_stmt)  1)
 fd-loops = loops;
   else
 fd-loops = fd-loop;
 
   fd-have_nowait = distribute || simd;
   fd-have_ordered = false;
+  fd-collapse = 1;
+  fd-ordered = 0;
   fd-sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd-chunk_size = NULL_TREE;
   fd-simd_schedule = false;
@@ -513,6 +515,8 @@ extract_omp_for_data (gomp_for *for_stmt
break;
   case OMP_CLAUSE_ORDERED:
fd-have_ordered = true;
+   if (OMP_CLAUSE_ORDERED_EXPR (t))
+ fd-ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
break;
   case OMP_CLAUSE_SCHEDULE:
gcc_assert (!distribute  !taskloop);
@@ -525,6 +529,7 @@ extract_omp_for_data (gomp_for *for_stmt
fd-chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
break;
   case OMP_CLAUSE_COLLAPSE:
+   fd-collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
if (fd-collapse  1)
  {
collapse_iter = OMP_CLAUSE_COLLAPSE_ITERVAR (t);
@@ -559,9 +564,10 @@ extract_omp_for_data (gomp_for *for_stmt
 ? integer_zero_node : integer_one_node;
 }
 
-  for (i = 0; i  fd-collapse; i++)
+  int cnt = fd-collapse + (fd-ordered  0 ? fd-ordered - 1 : 0);
+  for (i = 0; i  cnt; i++)
 {
-  if (fd-collapse == 1)
+  if (i == 0  fd-collapse == 1)
loop = fd-loop;
   else if (loops != NULL)
loop = loops + i;
@@ -589,6 +595,8 @@ extract_omp_for_data (gomp_for *for_stmt
  == GF_OMP_FOR_KIND_CILKFOR));
  break;
case LE_EXPR:
+ if (i = fd-collapse)
+   break;
  if (POINTER_TYPE_P (TREE_TYPE (loop-n2)))
loop-n2 = fold_build_pointer_plus_hwi_loc (loc, loop-n2, 1);
  else
@@ -598,6 +606,8 @@ extract_omp_for_data (gomp_for *for_stmt
  loop-cond_code = LT_EXPR;
  break;
case GE_EXPR:
+ if (i = fd-collapse)
+   break;
  if (POINTER_TYPE_P (TREE_TYPE (loop-n2)))
loop-n2 = fold_build_pointer_plus_hwi_loc (loc, loop-n2, -1);
  else
@@ -690,6 +700,9 @@ extract_omp_for_data (gomp_for *for_stmt
}
}
 
+  if (i = fd-collapse)
+   continue;
+
   if (collapse_count  *collapse_count == NULL)
{
  t = fold_binary (loop-cond_code, boolean_type_node,
@@ -770,6 +783,8 @@ extract_omp_for_data (gomp_for *for_stmt
   fd-loop.step = build_int_cst (TREE_TYPE (fd-loop.v), 1);
   fd-loop.cond_code = LT_EXPR;
 }
+  else if (loops)
+loops[0] = fd-loop;
 
   /* For OpenACC loops, force a chunk size of one, as this avoids the default
 scheduling where several subsequent iterations are being executed by the
@@ -6827,6 +6842,81 @@ extract_omp_for_update_vars (struct omp_
 }
 
 
+/* Wrap the body into fd-ordered - 1 loops that aren't collapsed.  */
+
+static basic_block
+expand_omp_for_ordered_loops (struct

[PATCH, libgomp]: Require target vect_simd_clones for testsuite/libgomp.c++/pr66702-?.C

2015-07-02 Thread Uros Bizjak

2015-07-02  Uros Bizjak  ubiz...@gmail.com

* testsuite/libgomp.c++/pr66702-1.C: Require
vect_simd_clones effective target.
* testsuite/libgomp.c++/pr66702-2.C: Ditto.

Tested on x86_64-linux-gnu (CentOS 5.11), committed to mainline SVN.

Uros.

Index: testsuite/libgomp.c++/pr66702-1.C
===
--- testsuite/libgomp.c++/pr66702-1.C   (revision 225240)
+++ testsuite/libgomp.c++/pr66702-1.C   (working copy)
@@ -1,4 +1,5 @@
// PR middle-end/66702
+// { dg-do run { target vect_simd_clones } }
// { dg-options -O2 }
// { dg-additional-options -msse2 { target sse2_runtime } }
// { dg-additional-options -mavx { target avx_runtime } }
Index: testsuite/libgomp.c++/pr66702-2.C
===
--- testsuite/libgomp.c++/pr66702-2.C   (revision 225240)
+++ testsuite/libgomp.c++/pr66702-2.C   (working copy)
@@ -1,4 +1,5 @@
// PR middle-end/66702
+// { dg-do run { target vect_simd_clones } }
// { dg-options -O2 }
// { dg-additional-options -msse2 { target sse2_runtime } }
// { dg-additional-options -mavx { target avx_runtime } }

Re: [PATCH] Discard Scops for which entry==exit

2015-07-02 Thread Tobias Grosser


On 06/30/2015 05:47 PM, Aditya K wrote:

Hi Tobias,
A test case (gcc/testsuite/gcc.dg/graphite/pr18792.c) came up when we removed 
`graphite-scop-detection.c:limit_scops'.
The test case is a scop where entry==exit,

BB5 (*#) - BB6 (#);
BB6 - BB5;

In this case BB2 is out of the scop. This is basically an empty (infinite) loop 
with no entr


OK, maybe mention this in the commit message.


Best,
Tobias

Re: C++ PATCH to change default dialect to C++14

2015-07-02 Thread Marc Glisse


On Thu, 2 Jul 2015, Jason Merrill wrote:


On 07/02/2015 12:10 AM, Jim Wilson wrote:

This is a known gmp problem, documented in PR56019 and in
 https://gcc.gnu.org/gcc-4.9/porting_to.html
near the bottom where it discusses cstddef.h changes.


This document also says that A workaround until libraries get updated is to 
include cstddef or stddef.h before any headers from that library.


Can you try modifying the graphite* files accordingly?


See also

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65988#c1

system.h looks like the right way to include gmp.h in gcc, currently.

--
Marc Glisse

Re: C++ PATCH to change default dialect to C++14

2015-07-02 Thread Jim Wilson

On Wed, Jul 1, 2015 at 10:21 PM, Jason Merrill ja...@redhat.com wrote:
 This document also says that A workaround until libraries get updated is to
 include cstddef or stddef.h before any headers from that library.
 Can you try modifying the graphite* files accordingly?

Right.  I forgot to try that.  Trying it now, I see that my build gets
past the point that it failed, so this does appear to work.  I won't
be able to finish a proper test until tomorrow, but for now this patch
seems to work.

Jim
2015-07-01  Jim Wilson  jim.wil...@linaro.org

	* graphite-blocking.c (HAVE_isl): Include stddef.h.
	* graphite-dependencies.c, graphite-interchange.c,
	graphite-isl-ast-to-gimple.c, graphite-optimize-isl.c, graphite-poly.c,
	graphite-scop-detection.c, graphite-sese-to-poly.c, graphite.c:
	Likewise.

Index: graphite-blocking.c
===
--- graphite-blocking.c	(revision 225286)
+++ graphite-blocking.c	(working copy)
@@ -24,6 +24,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite-dependences.c
===
--- graphite-dependences.c	(revision 225286)
+++ graphite-dependences.c	(working copy)
@@ -22,6 +22,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite-interchange.c
===
--- graphite-interchange.c	(revision 225286)
+++ graphite-interchange.c	(working copy)
@@ -24,6 +24,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/aff.h
 #include isl/set.h
 #include isl/map.h
Index: graphite-isl-ast-to-gimple.c
===
--- graphite-isl-ast-to-gimple.c	(revision 225286)
+++ graphite-isl-ast-to-gimple.c	(working copy)
@@ -21,6 +21,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite-optimize-isl.c
===
--- graphite-optimize-isl.c	(revision 225286)
+++ graphite-optimize-isl.c	(working copy)
@@ -21,6 +21,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite-poly.c
===
--- graphite-poly.c	(revision 225286)
+++ graphite-poly.c	(working copy)
@@ -22,6 +22,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite-scop-detection.c
===
--- graphite-scop-detection.c	(revision 225286)
+++ graphite-scop-detection.c	(working copy)
@@ -22,6 +22,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite-sese-to-poly.c
===
--- graphite-sese-to-poly.c	(revision 225286)
+++ graphite-sese-to-poly.c	(working copy)
@@ -21,6 +21,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/union_map.h
Index: graphite.c
===
--- graphite.c	(revision 225286)
+++ graphite.c	(working copy)
@@ -35,6 +35,9 @@ along with GCC; see the file COPYING3.
 #include config.h
 
 #ifdef HAVE_isl
+/* Workaround for GMP 5.1.3 bug, see PR56019.  */
+#include stddef.h
+
 #include isl/set.h
 #include isl/map.h
 #include isl/options.h

Re: [PATCH] Graphite cannot handle return stmt

2015-07-02 Thread Tobias Grosser


On 06/30/2015 10:50 PM, Aditya Kumar wrote:

No regressions.

2015-06-29  Aditya Kumar  aditya...@samsung.com
 Sebastian Pop s@samsung.com

 * graphite-scop-detection.c (stmt_simple_for_scop_p): Bail out in case 
of a return statement.


LGTM.

Tobias

Re: [PATCH, lto-plugin]: Avoid warning: implicit declaration of function ‘asprintf’ when building lto-plugin.o

2015-07-02 Thread Richard Biener

On Wed, Jul 1, 2015 at 2:25 PM, Uros Bizjak ubiz...@gmail.com wrote:
 Hello!

 We have to enable OS extensions (GNU_SOURCE) to use asprintf.

 2015-07-01  Uros Bizjak  ubiz...@gmail.com

 * configure.ac: Add AC_USE_SYSTEM_EXTENSIONS.
 * configure: Regenerate.
 * config.h.in: Ditto.

 Bootstrapped on x86_64-linux-gnu.

 OK for mainline?

Ok.

Richard.

 Uros.

 Index: configure.ac
 ===
 --- configure.ac(revision 225240)
 +++ configure.ac(working copy)
 @@ -9,6 +9,7 @@
 [specify the directory where to find libiberty [../libiberty]])],
   [], with_libiberty=../libiberty)
 AC_SUBST(with_libiberty)
 +AC_USE_SYSTEM_EXTENSIONS
 AC_PROG_CC
 AC_SYS_LARGEFILE
 ACX_PROG_CC_WARNING_OPTS([-Wall], [ac_lto_plugin_warn_cflags])

Re: [PATCH, libcpp]: Use asm flag outputs in search_line_sse42 main loop

2015-07-02 Thread Ondřej Bílka

On Mon, Jun 29, 2015 at 09:07:22PM +0200, Uros Bizjak wrote:
 Hello!
 
 Attached patch introduces asm flag outputs in seach_line_sse42 main
 loop to handle carry flag value from pcmpestri insn. Slightly improved
 old code that uses asm loop compiles to:

Using sse4.2 here is bit dubios as pcmpistri has horrible latency, and
four checks are near boundary where replacing it by sse2 sequence is
faster.

So I looked closer and wrote program to count number of source file lines to 
compute.

I found that there is almost no difference between sse2, sse4.2 code or
just calling strpbrk.

But there were significant performance mistakes in sse2 code. First one
is that a comment

  /* Create a mask for the bytes that are valid within the first
 16-byte block.  The Idea here is that the AND with the mask
 within the loop is free, since we need some AND or TEST
 insn in order to set the flags for the branch anyway.  */

First claim about free is false as gcc does repeat setting mask to 1 in
each iteration instead only on first.

Then there is problem that here jumping directly into loop is bad idea
due to branch misprediction. Its better to use header when its likely
that loop ends in first iteration.

A worst problem is that using aligned load and masking is unpredictable
loop, depending on alignment it could only check one byte.

A correct approach here is check if we cross page boundary and use
unaligned load. That always checks 16 bytes instead of 8 on average when
alignment is completely random.

That improved a sse2 code to be around 5% faster than sse4.2 code.

A second optimization is that most lines are less than 80 characters
long. So don't bother with loop just do checks in header. That gives
another 5%

A benchmark is bit ugly, usage is

./benchmark file function repeat
where you need supply source named file that will be scanned repeat
times. A functions tested are following:
./benchmark foo.c 1 10 # strpbrk
./benchmark foo.c 2 10 # current sse2
./benchmark foo.c 3 10 # current sse4.2
./benchmark foo.c 4 10 # improved sse2 with unaligned check of 16 bytes.
./benchmark foo.c 5 10 # improved sse2 with unaligned check of 16 bytes.


I will send patch later, do you have comments about that improvements?
#include string.h
#include sys/mman.h
#include sys/stat.h
#include fcntl.h
#include stdio.h
#include stdint.h
#include stdlib.h
#include unistd.h
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)

char *next_line(char *x, char *y)
{
  return strpbrk(x,\r\n?\\);
}

#include emmintrin.h
#define __v16qi v16qi
#define uchar unsigned char


/* Replicated character data to be shared between implementations.
   Recall that outside of a context with vector support we can't
   define compatible vector types, therefore these are all defined
   in terms of raw characters.  */
static const char repl_chars[4][16] __attribute__((aligned(16))) = {
  { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
  { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
  { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
  { '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?' },
};

/* A version of the fast scanner using SSE2 vectorized byte compare insns.  */

static const uchar *
#ifndef __SSE2__
__attribute__((__target__(sse2)))
#endif
search_line_sse2 (const uchar *s, const uchar *end )
{
  typedef char v16qi __attribute__ ((__vector_size__ (16)));

  const v16qi repl_nl = *(const v16qi *)repl_chars[0];
  const v16qi repl_cr = *(const v16qi *)repl_chars[1];
  const v16qi repl_bs = *(const v16qi *)repl_chars[2];
  const v16qi repl_qm = *(const v16qi *)repl_chars[3];

  unsigned int misalign, found, mask;
  const v16qi *p;
  v16qi data, t;

  /* Align the source pointer.  */
  misalign = (uintptr_t)s  15;
  p = (const v16qi *)((uintptr_t)s  -16);
  data = *p;

  /* Create a mask for the bytes that are valid within the first
 16-byte block.  The Idea here is that the AND with the mask
 within the loop is free, since we need some AND or TEST
 insn in order to set the flags for the branch anyway.  */
  mask = -1u  misalign;

  /* Main loop processing 16 bytes at a time.  */
  goto start;
  do
{
  data = *++p;
  mask = -1;

start:
  t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
  t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
  t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
  t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
  found = __builtin_ia32_pmovmskb128 (t);
  found = mask;
}
  while (!found);

  /* FOUND contains 1 in bits for which we matched a relevant
 character.  Conversion to the byte index is trivial.  */
  found = __builtin_ctz(found);
  return (const uchar *)p + found;
}
#define OR(x,y) ((x)|(y))
static const uchar *

Fixed Regressions with [committed] Use target-insns.def for prologue epilogue insns

2015-07-02 Thread Hans-Peter Nilsson

 From: Richard Sandiford rdsandif...@googlemail.com
 Date: Wed, 1 Jul 2015 23:26:59 +0200

 Hans-Peter Nilsson hans-peter.nils...@axis.com writes:
  From: Richard Sandiford richard.sandif...@arm.com
  Date: Tue, 30 Jun 2015 22:55:24 +0200

  Bootstrapped  regression-tested on x86_64-linux-gnu and aarch64-linux-gnu.
  Also tested via config-list.mk.  Committed as preapproved.

  Thanks,
  Richard

  gcc/
  * defaults.h (HAVE_epilogue, gen_epilogue): Delete.
  * target-insns.def (epilogue, prologue, sibcall_prologue): New
  targetm instruction patterns.
  * alias.c (init_alias_analysis): Use them instead of HAVE_*/gen_*
  interface.
  * calls.c (expand_call): Likewise.
  * cfgrtl.c (cfg_layout_finalize): Likewise.
  * df-scan.c (df_get_entry_block_def_set): Likewise.
  (df_get_exit_block_use_set): Likewise.
  * dwarf2cfi.c (pass_dwarf2_frame::gate): Likewise.
  * final.c (final_start_function): Likewise.
  * function.c (thread_prologue_and_epilogue_insns): Likewise.
  (reposition_prologue_and_epilogue_notes): Likewise.
  * reorg.c (find_end_label): Likewise.
  * toplev.c (process_options): Likewise.

  I think this one -being the most fitting patch in the range
  (225190:225210]- caused this regression for cris-elf:

  Running
  /tmp/hpautotest-gcc1/gcc/gcc/testsuite/gcc.target/cris/torture/cris-torture.exp
  ...
  FAIL: gcc.target/cris/torture/no-pro-epi-1.c   -O3 -g  (internal compiler 
  error)
  FAIL: gcc.target/cris/torture/no-pro-epi-1.c   -O3 -g  (test for excess 
  errors)

  This test checks that the -mno-prologue-epilogue option works,
  whose semantics is supposedly self-explanatory.

 Well, yes and no :-)

Hm...I take that as an affirmation on the regression but perhaps
a no to some of the my statements...

  The crash is coming from the code that outputs
 dwarf CFI information.  The code that records this information is skipped
 for targets without rtl prologues, with the comment:

   /* Targets which still implement the prologue in assembler text
  cannot use the generic dwarf2 unwinding.  */

 That seems accurate.  So what's -mno-prologue-epilogue supposed to do
 wrt CFI entries?  Should it output empty entries or none at all?

A big whatever on that one.  Debugging and omitting prologue
and epilogue is not something I find reason to spend time on
other than making sure there are no crashes.

 The first-order reason for the failure is that the code used to be
 conditional on #ifndef HAVE_prologue and didn't care what HAVE_prologue
 itself evaluated to.  So the condition on the pattern wasn't actually
 tested.

Not completely true: there was inconsistency between uses of
#ifdef and if (HAVE_prologue).

 Which I suppose leads to the question: does !HAVE_prologue when prologue
 is defined mean I know how to output rtl prologues, but the prologue
 for this function is empty or I'll output the prologue as text rather
 than rtl.  I think it logically means the second.

Agreed.

 The condition says
 whether the pattern can be used; if the pattern can be used but happens
 to generate no code then it just outputs no instructions (which is pretty
 common for prologues in leaf functions).

 The port seems to hedge its bets here.  It has both:

 (define_expand prologue
   [(const_int 0)]
   TARGET_PROLOGUE_EPILOGUE
   cris_expand_prologue (); DONE;)

 and:

 void
 cris_expand_prologue (void)
 {
   [...]
   /* Don't do anything if no prologues or epilogues are wanted.  */
   if (!TARGET_PROLOGUE_EPILOGUE)
 return;

Yeah, a visit to the archive supports me thinking this was an
oversight, perhaps caused by the effects of the now fixed
inconsistency.

 which I guess means that the HAVE_prologue condition wasn't being
 consistently tested.  Now that it is: is -mno-prologue-epilogue
 just supposed to generate empty prologues and epilogues, as implied
 by the cris.c code?  If so then removing the conditions on prologue
 and epilogue should work.  If not, then which of the targetm.have_prologue 
 ()
 etc. conditions do you need to be true for -mno-prologue-epilogue?

 (You have the distinction of having the only port with conditional
 prologue and epilogue patterns. :-))

Not any longer.  Also removed a stale comment.
This committed patch fixes the noted regressions, without
causing further regressions, testing cris-elf in a simulator.

gcc:
* config/cris/cris.md (epilogue): Remove condition.
(prologue): Ditto.

Index: config/cris/cris.md
===
--- config/cris/cris.md (revision 225286)
+++ config/cris/cris.md (working copy)
@@ -3518,14 +3518,12 @@ (define_insn *return_expanded

 (define_expand prologue
   [(const_int 0)]
-  TARGET_PROLOGUE_EPILOGUE
+  
   cris_expand_prologue (); DONE;)

-;; Note that the (return) from the expander itself is always the last
-;; insn in the epilogue.

[PATCH] Fix PR66719

2015-07-02 Thread Richard Biener


Committed.

Richard.

2015-07-02  Richard Biener  rguent...@suse.de

PR testsuite/66719
* gcc.dg/vect/bb-slp-32.c: Re-add XFAIL for targets not supporting
unaligned loads.

Index: gcc/testsuite/gcc.dg/vect/bb-slp-32.c
===
--- gcc/testsuite/gcc.dg/vect/bb-slp-32.c   (revision 225249)
+++ gcc/testsuite/gcc.dg/vect/bb-slp-32.c   (working copy)
@@ -19,4 +19,4 @@ int foo (int *p)
   return tem0 + tem1 + tem2 + tem3;
 }
 
-/* { dg-final { scan-tree-dump vectorization is not profitable slp2 } } */
+/* { dg-final { scan-tree-dump vectorization is not profitable slp2 { 
xfail  { vect_no_align  { ! vect_hw_misalign } } } } } */

Re: [PATCH 2/4][PR target/65697][5.1][Aarch64] Backport stronger barriers for __sync,fetch-op builtins.

2015-07-02 Thread James Greenhalgh

On Fri, Jun 26, 2015 at 01:07:09PM +0100, Matthew Wahab wrote:
 
 This patch backports the changes made to strengthen the barriers emitted for
 the __sync fetch-and-op/op-and-fetch builtins.
 
 The trunk patch submission is at
 https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01989.html
 The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-06/msg00076.html
 
 Tested the series for aarch64-none-linux-gnu with check-gcc
 
 Ok for the branch?
 Matthew

OK.

Thanks,
James

 
 2015-06-26  Matthew Wahab  matthew.wa...@arm.com
 
   Backport from trunk.
   2015-06-01  Matthew Wahab  matthew.wa...@arm.com
 
   PR target/65697
   * config/aarch64/aarch64.c (aarch64_emit_post_barrier):New.
   (aarch64_split_atomic_op): Check for __sync memory models, emit
   appropriate initial loads and final barriers.
 

 From d6d3351b4547d0ad52e4d7e9955fafdced11491a Mon Sep 17 00:00:00 2001
 From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
 Date: Mon, 1 Jun 2015 15:18:19 +
 Subject: [PATCH 2/4] [Aarch64][5.1] Strengthen barriers for sync-fetch-op
  builtin.
 
 PR target/65697
   * config/aarch64/aarch64.c (aarch64_emit_post_barrier):New.
   (aarch64_split_atomic_op): Check for __sync memory models, emit
   appropriate initial loads and final barriers.
 
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223983 
 138bc75d-0d04-0410-961f-82ee72b054a4
 
 Conflicts:
   gcc/ChangeLog
   gcc/config/aarch64/aarch64.c
 
 Change-Id: I45600c4dd0002b4c2d48de36d695c83581fe50da
 ---
  gcc/config/aarch64/aarch64.c | 31 ++-
  1 file changed, 30 insertions(+), 1 deletion(-)
 
 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
 index b8b37b8..708fc23 100644
 --- a/gcc/config/aarch64/aarch64.c
 +++ b/gcc/config/aarch64/aarch64.c
 @@ -9066,6 +9066,23 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, bval, x));
  }
  
 +/* Emit a barrier, that is appropriate for memory model MODEL, at the end of 
 a
 +   sequence implementing an atomic operation.  */
 +
 +static void
 +aarch64_emit_post_barrier (enum memmodel model)
 +{
 +  const enum memmodel base_model = memmodel_base (model);
 +
 +  if (is_mm_sync (model)
 +   (base_model == MEMMODEL_ACQUIRE
 +   || base_model == MEMMODEL_ACQ_REL
 +   || base_model == MEMMODEL_SEQ_CST))
 +{
 +  emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
 +}
 +}
 +
  /* Split a compare and swap pattern.  */
  
  void
 @@ -9128,6 +9145,8 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
 old_out, rtx new_out, rtx mem,
  {
machine_mode mode = GET_MODE (mem);
machine_mode wmode = (mode == DImode ? DImode : SImode);
 +  const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
 +  const bool is_sync = is_mm_sync (model);
rtx_code_label *label;
rtx x;
  
 @@ -9142,7 +9161,13 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
 old_out, rtx new_out, rtx mem,
  old_out = new_out;
value = simplify_gen_subreg (wmode, value, mode, 0);
  
 -  aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
 +  /* The initial load can be relaxed for a __sync operation since a final
 + barrier will be emitted to stop code hoisting.  */
 + if (is_sync)
 +aarch64_emit_load_exclusive (mode, old_out, mem,
 +  GEN_INT (MEMMODEL_RELAXED));
 +  else
 +aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
  
switch (code)
  {
 @@ -9178,6 +9203,10 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
 old_out, rtx new_out, rtx mem,
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
   gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
 +
 +  /* Emit any final barrier needed for a __sync operation.  */
 +  if (is_sync)
 +aarch64_emit_post_barrier (model);
  }
  
  static void
 -- 
 1.9.1

Re: [PATCH, PR66432] Handle PARM_DECL in remap_gimple_op_r

2015-07-02 Thread Richard Biener

On Wed, Jul 1, 2015 at 7:09 PM, Tom de Vries tom_devr...@mentor.com wrote:
 On 01/07/15 13:58, Richard Biener wrote:

 On Wed, Jul 1, 2015 at 1:43 PM, Tom de Vries tom_devr...@mentor.com
 wrote:

 Hi,

 I.

 When running test libgomp.c/appendix-a/a.29.1.c with '--target_board
 unix/-O2/-g', we run into this failure:
 ...
 FAIL: libgomp.c/appendix-a/a.29.1.c (test for excess errors)
 Excess errors:
 src/libgomp/testsuite/libgomp.c/appendix-a/a.29.1.c:6:1: error: type
 mismatch between an SSA_NAME and its symbol
 ...

 Without -g, the testcase passes.


 II.

 The scenario for the failure is as follows:

 At fnsplit, we split off f.part.0 from f, which at source level looks
 like
 this:
 ...
 void
 f (int n, int B[n][n], int C[])
 {
int D[2][2] = { 1, 2, 3, 4 };
int E[n][n];
assert (n = 2);
E[1][1] = 4;
 #pragma omp parallel firstprivate(B, C, D, E)
{
  assert (sizeof (B) == sizeof (int (*)[n]));
  assert (sizeof (C) == sizeof (int *));
  assert (sizeof (D) == 4 * sizeof (int));
  assert (sizeof (E) == n * n * sizeof (int));
  /* Private B and C have values of original B and C. */
  assert (B[1][1] == A[1][1]);
  assert (C[3] == A[1][1]);
  assert (D[1][1] == 4);
  assert (E[1][1] == 4);
}
 }
 ...

 The split introduces a debug_insn and ssa-name that references param B in
 f:
 ...
# DEBUG D#4ptD.0 = B_3(D)
 ..

 And a debug_insn that references param B in f.part.0:
 ...
# DEBUG D#7ptD.0 s= BD.1846
 ...

 At this point, the type of the ssa name and the param are the same.


 With the same PARM_DECL?  I think that's the bug.


 Attached patch also fixes the ICE, by copying the PARM_DECL using in the
 debug insn. Does this look ok for testing?

Hmm, it looks like it would break the purpose of this strange code.
It looks like
Jakub added this so CCing him for comments.

What should probably be done is to indeed copy the PARM_DECL for the reference
in the callee but make it have an abstract origin refering to the
PARM_DECL in the
caller?

Jakub did add guality tests - gcc.dg/guality/pr54519-?.c so you might want to
check whether that still passes after any change (and first check it still tests
what it is supposed to test, that is, ipa-split still applying and
removing a parameter)

Richard.

 Thanks,
 - Tom

Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2015-07-02 Thread Richard Earnshaw

On 30/06/15 02:15, Jim Wilson wrote:
 This is my suggested fix for PR 65932, which is a linux kernel
 miscompile with gcc-5.1.
 
 The problem here is caused by a chain of events.  The first is that
 the relatively new eipa_sra pass creates fake parameters that behave
 slightly differently than normal parameters.  The second is that the
 optimizer creates phi nodes that copy local variables to fake
 parameters and/or vice versa.  The third is that the ouf-of-ssa pass
 assumes that it can emit simple move instructions for these phi nodes.
 And the fourth is that the ARM port has a PROMOTE_MODE macro that
 forces QImode and HImode to unsigned, but a
 TARGET_PROMOTE_FUNCTION_MODE hook that does not.  So signed char and
 short parameters have different in register representations than local
 variables, and require a conversion when copying between them, a
 conversion that the out-of-ssa pass can't easily emit.
 
 Ultimately, I think this is a problem in the arm backend.  It should
 not have a PROMOTE_MODE macro that is changing the sign of char and
 short local variables.  I also think that we should merge the
 PROMOTE_MODE macro with the TARGET_PROMOTE_FUNCTION_MODE hook to
 prevent this from happening again.
 

The documentation for PROMOTE_MODE says:

For most machines, the macro definition does not change @var{unsignedp}.
However, some machines, have instructions that preferentially handle
either signed or unsigned quantities of certain modes.  For example, on
the DEC Alpha, 32-bit loads from memory and 32-bit add instructions
sign-extend the result to 64 bits.  On such machines, set
@var{unsignedp} according to which kind of extension is more efficient.

So it seems to me that the ARM backend is only doing what the
documentation says should work.

 I see four general problems with the current ARM PROMOTE_MODE definition.
 1) Unsigned char is only faster for armv5 and earlier, before the sxtb
 instruction was added.  It is a lose for armv6 and later.

Not quite, ARM state still has more flexible addressing modes for
unsigned byte loads than for signed byte loads.  It's even worse with
thumb1 where some signed loads have no single-register addressing mode
(ie you have to copy zero into another register to use as an index
before doing the load).


R.

 2) Unsigned short was only faster for targets that don't support
 unaligned accesses.  Support for these targets was removed a while
 ago, and this PROMODE_MODE hunk should have been removed at the same
 time.  It was accidentally left behind.
 3) TARGET_PROMOTE_FUNCTION_MODE used to be a boolean hook, when it was
 converted to a function, the PROMOTE_MODE code was copied without the
 UNSIGNEDP changes.  Thus it is only an accident that
 TARGET_PROMOTE_FUNCTION_MODE and PROMOTE_MODE disagree.  Changing
 TARGET_PROMOTE_FUNCTION_MODE is an ABI change, so only PROMOTE_MODE
 changes to resolve the difference are safe.
 4) There is a general principle that you should only change signedness
 in PROMOTE_MODE if the hardware forces it, as otherwise this results
 in extra conversion instructions that make code slower.  The mips64
 hardware for instance requires that 32-bit values be sign-extended
 regardless of type, and instructions may trap if this is not true.
 However, it has a set of 32-bit instructions that operate on these
 values, and hence no conversions are required.  There is no similar
 case on ARM. Thus the conversions are unnecessary and unwise.  This
 can be seen in the testcases where gcc emits both a zero-extend and a
 sign-extend inside a loop, as the sign-extend is required for a
 compare, and the zero-extend is required by PROMOTE_MODE.
 
 My change was tested with an arm bootstrap, make check, and SPEC
 CPU2000 run.  The original poster verified that this gives a linux
 kernel that boots correctly.
 
 The PRMOTE_MODE change causes 3 testsuite testcases to fail.  These
 are tests to verify that smulbb and/or smlabb are generated.
 Eliminating the unnecessary sign conversions causes us to get better
 code that doesn't include the smulbb and smlabb instructions.  I had
 to modify the testcases to get them to emit the desired instructions.
 With the testcase changes there are no additional testsuite failures,
 though I'm concerned that these testcases with the changes may be
 fragile, and future changes may break them again.
 
 If there are ARM parts where smulbb/smlabb are faster than mul/mla,
 then maybe we should try to add new patterns to get the instructions
 emitted again for the unmodified testcases.
 
 Jim
 
 
 pr65932-3.patch
 
 
 gcc/
 2015-06-29  Jim Wilson  jim.wil...@linaro.org
 
   PR target/65932
   * config/arm/arm.h (PROMOTE_MODE): Don't set UNSIGNEDP for QImode and
   HImode.
 
 gcc/testsuite/
 2015-06-29  Jim Wilson  jim.wil...@linaro.org
 
   PR target/65932
   * gcc.target/arm/wmul-1.c (mac): Change a and b to int pointers.  Cast
   multiply operands to short.
   * gcc.target/arm/wmul-2.c (vec_mpy): Cast

RE: [Patch] Add support for IEEE-conformant versions of scalar fmin* and fmax*

2015-07-02 Thread David Sherwood


 On Mon, 29 Jun 2015, David Sherwood wrote:
 
  Hi,
 
  I have added new STRICT_MAX_EXPR and STRICT_MIN_EXPR expressions to support 
  the
  IEEE versions of fmin and fmax. This is done by recognising the math library
  fmax and fmin builtin functions in a similar way to how this is done for
  -ffast-math. This also allows us to vectorise the IEEE max/min functions for
  targets that support it, for example aarch64/aarch32.
 
 This patch is missing documentation.  You need to document the new insn
 patterns in md.texi and the new tree codes in generic.texi.

Hi, I've uploaded a new patch with the documentation. Hope this is ok.

Regards,
David Sherwood.

ChangeLog:

2015-07-02  David Sherwood  david.sherw...@arm.com

gcc/
* builtins.c (integer_valued_real_p): Add STRICT_MIN_EXPR and
STRICT_MAX_EXPR.
(fold_builtin_fmin_fmax): For strict math, convert builting fmin and 
fmax to STRICT_MIN_EXPR and STRICT_MIN_EXPR, respectively.
* expr.c (expand_expr_real_2): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
* fold-const.c (const_binop): Likewise.
(fold_binary_loc, tree_binary_nonnegative_warnv_p): Likewise.
(tree_binary_nonzero_warnv_p): Likewise.
* optabs.h (strict_minmax_support): Declare.
* optabs.def: Add new optabs strict_max_optab/strict_min_optab.
* optabs.c (optab_for_tree_code): Return new optabs for STRICT_MIN_EXPR
and STRICT_MAX_EXPR.
(strict_minmax_support): New function.
* real.c (real_arithmetic): Add STRICT_MIN_EXPR and STRICT_MAX_EXPR.
* tree.def: Likewise.
* tree.c (associative_tree_code, commutative_tree_code): Likewise.
* tree-cfg.c (verify_expr): Likewise.
(verify_gimple_assign_binary): Likewise.
* tree-inline.c (estimate_operator_cost): Likewise.
* tree-pretty-print.c (dump_generic_node, op_code_prio): Likewise.
(op_symbol_code): Likewise.
gcc/config:
* aarch64/aarch64.md: New pattern.
* aarch64/aarch64-simd.md: Likewise.
* aarch64/iterators.md: New unspecs, iterators.
* arm/iterators.md: New iterators.
* arm/unspecs.md: New unspecs.
* arm/neon.md: New pattern.
* arm/vfp.md: Likewise.
gcc/doc:
* generic.texi: Add STRICT_MAX_EXPR and STRICT_MIN_EXPR.
* md.texi: Add strict_min and strict_max patterns.
gcc/testsuite
* gcc.target/aarch64/maxmin_strict.c: New test.
* gcc.target/arm/maxmin_strict.c: New test.




strict_max.patch
Description: Binary data

Re: [PATCH 4/4][PR target/65697][5.1][Aarch64] Backport tests for __sync_builtins.

2015-07-02 Thread James Greenhalgh

On Fri, Jun 26, 2015 at 01:10:21PM +0100, Matthew Wahab wrote:
 This patch backports the tests added for the code generated by the Aarch64 
 backend
 for the __sync builtins.
 
 The trunk patch submission is at
 https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01992.html
 The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-06/msg00079.html
 
 Tested the series for aarch64-none-linux-gnu with check-gcc
 
 Ok for the branch?
 Matthew

OK.

Thanks,
James

 
 2015-06-26  Matthew Wahab  matthew.wa...@arm.com
 
   Backport from trunk
   2015-06-01  Matthew Wahab  matthew.wa...@arm.com
 
   PR target/65697
   * gcc.target/aarch64/sync-comp-swap.c: New.
   * gcc.target/aarch64/sync-comp-swap.x: New.
   * gcc.target/aarch64/sync-op-acquire.c: New.
   * gcc.target/aarch64/sync-op-acquire.x: New.
   * gcc.target/aarch64/sync-op-full.c: New.
   * gcc.target/aarch64/sync-op-full.x: New.
   * gcc.target/aarch64/sync-op-release.c: New.
   * gcc.target/aarch64/sync-op-release.x: New.
 

 From 704058e9acd56043c3b8549c3bbe14acf1c370e3 Mon Sep 17 00:00:00 2001
 From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
 Date: Mon, 1 Jun 2015 15:24:37 +
 Subject: [PATCH 4/4] [Aarch64][5.1] Add tests for __sync_builtins.
 
   PR target/65697
   * gcc.target/aarch64/sync-comp-swap.c: New.
   * gcc.target/aarch64/sync-comp-swap.x: New.
   * gcc.target/aarch64/sync-op-acquire.c: New.
   * gcc.target/aarch64/sync-op-acquire.x: New.
   * gcc.target/aarch64/sync-op-full.c: New.
   * gcc.target/aarch64/sync-op-full.x: New.
   * gcc.target/aarch64/sync-op-release.c: New.
   * gcc.target/aarch64/sync-op-release.x: New.
 
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223986 
 138bc75d-0d04-0410-961f-82ee72b054a4
 
 Conflicts:
   gcc/testsuite/ChangeLog
 
 Change-Id: I1cc83df41532588a7d91c5b021838392e5547e85
 ---
  gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c  |  8 +++
  gcc/testsuite/gcc.target/aarch64/sync-comp-swap.x  | 13 
  gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c |  8 +++
  gcc/testsuite/gcc.target/aarch64/sync-op-acquire.x |  7 +++
  gcc/testsuite/gcc.target/aarch64/sync-op-full.c|  8 +++
  gcc/testsuite/gcc.target/aarch64/sync-op-full.x| 73 
 ++
  gcc/testsuite/gcc.target/aarch64/sync-op-release.c |  6 ++
  gcc/testsuite/gcc.target/aarch64/sync-op-release.x |  7 +++
  8 files changed, 130 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-comp-swap.x
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-acquire.x
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-full.x
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-release.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/sync-op-release.x
 
 diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c 
 b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
 new file mode 100644
 index 000..126b997
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
 @@ -0,0 +1,8 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 -fno-ipa-icf } */
 +
 +#include sync-comp-swap.x
 +
 +/* { dg-final { scan-assembler-times ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\] 2 
 } } */
 +/* { dg-final { scan-assembler-times stlxr\tw\[0-9\]+, w\[0-9\]+, 
 \\\[x\[0-9\]+\\\] 2 } } */
 +/* { dg-final { scan-assembler-times dmb\tish 2 } } */
 diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.x 
 b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.x
 new file mode 100644
 index 000..eda52e40
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.x
 @@ -0,0 +1,13 @@
 +int v = 0;
 +
 +int
 +sync_bool_compare_swap (int a, int b)
 +{
 +  return __sync_bool_compare_and_swap (v, a, b);
 +}
 +
 +int
 +sync_val_compare_swap (int a, int b)
 +{
 +  return __sync_val_compare_and_swap (v, a, b);
 +}
 diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c 
 b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
 new file mode 100644
 index 000..2639f9f
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
 @@ -0,0 +1,8 @@
 +/* { dg-do compile } */
 +/* { dg-options -O2 } */
 +
 +#include sync-op-acquire.x
 +
 +/* { dg-final { scan-assembler-times ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\] 1 
 } } */
 +/* { dg-final { scan-assembler-times stxr\tw\[0-9\]+, w\[0-9\]+, 
 \\\[x\[0-9\]+\\\] 1 } } */
 +/* { dg-final { scan-assembler-times dmb\tish 1 } } */
 diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.x 
 b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.x
 new file mode 100644
 index 000..4c4548c
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.x
 @@ -0,0 +1,7 @@
 +int v;
 +
 +int
 +sync_lock_test_and_set (int a)
 +{
 +

Re: [PATCH 3/4][PR target/65697][5.1][Aarch64] Backport stronger barriers for __sync,compare-and-swap builtins.

2015-07-02 Thread James Greenhalgh

On Fri, Jun 26, 2015 at 01:08:50PM +0100, Matthew Wahab wrote:
 This patch backports the changes made to strengthen the barriers emitted for
 the __sync compare-and-swap builtins.
 
 The trunk patch submission is at
 https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01990.html
 The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-06/msg00077.html
 
 Tested the series for aarch64-none-linux-gnu with check-gcc
 
 Ok for the branch?
 Matthew

OK.

Thanks,
James

 
 2015-06-26  Matthew Wahab  matthew.wa...@arm.com
 
   Backport from trunk.
   2015-06-01  Matthew Wahab  matthew.wa...@arm.com
 
   PR target/65697
   * config/aarch64/aarch64.c (aarch64_split_compare_and_swap): Check
   for __sync memory models, emit initial loads and final barriers as
   appropriate.
 
 

 From 5fbfcc46e6eb2b8b61aa96c9c96da9a572bc4d12 Mon Sep 17 00:00:00 2001
 From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
 Date: Mon, 1 Jun 2015 15:21:02 +
 Subject: [PATCH 3/4] [Aarch64][5.1] Strengthen barriers for sync-compare-swap
  builtins
 
   PR target/65697
   * config/aarch64/aarch64.c (aarch64_split_compare_and_swap): Check
   for __sync memory models, emit initial loads and final barriers as
   appropriate.
 
 Change-Id: I65d8000c081d582246b81c7f3892c509a64b136c
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@223984 
 138bc75d-0d04-0410-961f-82ee72b054a4
 ---
  gcc/config/aarch64/aarch64.c | 18 --
  1 file changed, 16 insertions(+), 2 deletions(-)
 
 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
 index 708fc23..59d2e3a 100644
 --- a/gcc/config/aarch64/aarch64.c
 +++ b/gcc/config/aarch64/aarch64.c
 @@ -9093,14 +9093,18 @@ aarch64_split_compare_and_swap (rtx operands[])
bool is_weak;
rtx_code_label *label1, *label2;
rtx x, cond;
 +  enum memmodel model;
 +  rtx model_rtx;
  
rval = operands[0];
mem = operands[1];
oldval = operands[2];
newval = operands[3];
is_weak = (operands[4] != const0_rtx);
 +  model_rtx = operands[5];
scratch = operands[7];
mode = GET_MODE (mem);
 +  model = memmodel_from_int (INTVAL (model_rtx));
  
label1 = NULL;
if (!is_weak)
 @@ -9110,7 +9114,13 @@ aarch64_split_compare_and_swap (rtx operands[])
  }
label2 = gen_label_rtx ();
  
 -  aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
 +  /* The initial load can be relaxed for a __sync operation since a final
 + barrier will be emitted to stop code hoisting.  */
 +  if (is_mm_sync (model))
 +aarch64_emit_load_exclusive (mode, rval, mem,
 +  GEN_INT (MEMMODEL_RELAXED));
 +  else
 +aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
  
cond = aarch64_gen_compare_reg (NE, rval, oldval);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
 @@ -9118,7 +9128,7 @@ aarch64_split_compare_and_swap (rtx operands[])
   gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
  
 -  aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
 +  aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
  
if (!is_weak)
  {
 @@ -9135,6 +9145,10 @@ aarch64_split_compare_and_swap (rtx operands[])
  }
  
emit_label (label2);
 +
 +  /* Emit any final barrier needed for a __sync operation.  */
 +  if (is_mm_sync (model))
 +aarch64_emit_post_barrier (model);
  }
  
  /* Split an atomic operation.  */
 -- 
 1.9.1

Re: [PATCH 1/4][PR target/65697][5.1] Backport stronger barriers for GCC,__sync builtins on Aarch64

2015-07-02 Thread Matthew Wahab

The first patch was approved on the gcc list: 
https://gcc.gnu.org/ml/gcc/2015-07/msg00028.html


Matthew

Re: [PATCH] fix PR46029: reimplement if conversion of loads and stores [2nd submitted version of patch]

2015-07-02 Thread Alan Lawrence


Thanks, Abe. A couple comments below...


@@ -883,7 +733,7 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,

if (flag_tree_loop_if_convert_stores)
  {
-  if (ifcvt_could_trap_p (stmt, refs))
+  if (ifcvt_could_trap_p (stmt))
{
  if (ifcvt_can_use_mask_load_store (stmt))
{
@@ -892,9 +742,17 @@ if_convertible_gimple_assign_stmt_p (gimple stmt,
  return true;
}
  if (dump_file  (dump_flags  TDF_DETAILS))
-   fprintf (dump_file, tree could trap...\n);
+   fprintf (dump_file, tree could trap\n);
  return false;
}
+
+  if (has_non_addressable_refs (stmt))
+   {
+ if (dump_file  (dump_flags  TDF_DETAILS))
+   fprintf (dump_file, has non-addressable memory references\n);
+ return false;
+   }
+
return true;
  }


As before, I'm still confused here. This still returns false, i.e. bails out of
if-conversion, if the statement could trap. Doesn't the scratchpad let us handle
that? Or do we just not care because it won't be vectorizable anyway???


@@ -1342,7 +1190,7 @@ if_convertible_loop_p_1 (struct loop *loop,
/* Check the if-convertibility of statements in predicated BBs.  */
if (!dominated_by_p (CDI_DOMINATORS, loop-latch, bb))
for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (itr))
- if (!if_convertible_stmt_p (gsi_stmt (itr), *refs,
+ if (!if_convertible_stmt_p (gsi_stmt (itr),
  any_mask_load_store))


Nit: as before - line no longer needs wrapping (a few other cases too)


@@ -2063,12 +1997,14 @@ mask_exists (int size, vecint vec)
 | end_bb_1
 |
 | bb_2
+   |   cond = some_computation;


Nit: as before - thanks for fixing the example here, but...


 |   if (cond) goto bb_3 else goto bb_4
 | end_bb_2
 |
 | bb_3
 |   cond = some_computation;


...I think you mean to remove this last too.


@@ -2817,10 +2761,26 @@ public:
  bool
  pass_if_conversion::gate (function *fun)
  {
-  return (((flag_tree_loop_vectorize || fun-has_force_vectorize_loops)
-   flag_tree_loop_if_convert != 0)
- || flag_tree_loop_if_convert == 1
- || flag_tree_loop_if_convert_stores == 1);
+  return  (
+(
+  flag_tree_loop_vectorize
+   || fun-has_force_vectorize_loops
+)
+  (
+  (
+flag_tree_loop_if_convert!= 0
+  )
+   || (
+flag_tree_loop_if_convert_stores != 0
+  )
+)
+ )
+ || (
+  flag_tree_loop_if_convert 0
+)
+ || (
+  flag_tree_loop_if_convert_stores  0
+);
  }


That is quite complex. Where can I find info on what the different flag values 
mean? (I had thought they were booleans, clearly I'm wrong, but a quick scan 
through invoke.texi doesn't seem to help; both your testcases and your updates 
to invoke.texi say e.g. -ftree-loop-if-convert-stores not 
-ftree-loop-if-convert-stores=value)



Cheers, Alan

[PATCH] Consolidate alignment folding

2015-07-02 Thread Richard Biener


This consolidates alignment folding with get_pointer_alignment_1
thereby also making it stronger, using SSA name alignment info
when available.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-07-02  Richard Biener  rguent...@suse.de

* builtins.c (get_pointer_alignment_1): Handle POINTER_PLUS_EXPR.
* fold-const.c (get_pointer_modulus_and_residue): Remove.
(fold_binary_loc): Implement (T)ptr  CST in terms of
get_pointer_alignment_1.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
Make sure to build the alignment test on a SSA name without
final alignment info valid only after the prologue.

Index: gcc/builtins.c
===
*** gcc/builtins.c  (revision 225309)
--- gcc/builtins.c  (working copy)
*** get_pointer_alignment_1 (tree exp, unsig
*** 473,478 
--- 473,500 
if (TREE_CODE (exp) == ADDR_EXPR)
  return get_object_alignment_2 (TREE_OPERAND (exp, 0),
   alignp, bitposp, true);
+   else if (TREE_CODE (exp) == POINTER_PLUS_EXPR)
+ {
+   unsigned int align;
+   unsigned HOST_WIDE_INT bitpos;
+   bool res = get_pointer_alignment_1 (TREE_OPERAND (exp, 0),
+ align, bitpos);
+   if (TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
+   bitpos += TREE_INT_CST_LOW (TREE_OPERAND (exp, 1)) * BITS_PER_UNIT;
+   else
+   {
+ unsigned int trailing_zeros = tree_ctz (TREE_OPERAND (exp, 1));
+ if (trailing_zeros  HOST_BITS_PER_INT)
+   {
+ unsigned int inner = (1U  trailing_zeros) * BITS_PER_UNIT;
+ if (inner)
+   align = MIN (align, inner);
+   }
+   }
+   *alignp = align;
+   *bitposp = bitpos  (align - 1);
+   return res;
+ }
else if (TREE_CODE (exp) == SSA_NAME
POINTER_TYPE_P (TREE_TYPE (exp)))
  {
Index: gcc/fold-const.c
===
*** gcc/fold-const.c(revision 225309)
--- gcc/fold-const.c(working copy)
*** fold_mult_zconjz (location_t loc, tree t
*** 9350,9432 
  }
  
  
- /* Subroutine of fold_binary.  If P is the value of EXPR, computes
-power-of-two M and (arbitrary) N such that M divides (P-N).  This condition
-guarantees that P and N have the same least significant log2(M) bits.
-N is not otherwise constrained.  In particular, N is not normalized to
-0 = N  M as is common.  In general, the precise value of P is unknown.
-M is chosen as large as possible such that constant N can be determined.
- 
-Returns M and sets *RESIDUE to N.
- 
-If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into
-account.  This is not always possible due to PR 35705.
-  */
- 
- static unsigned HOST_WIDE_INT
- get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue,
-bool allow_func_align)
- {
-   enum tree_code code;
- 
-   *residue = 0;
- 
-   code = TREE_CODE (expr);
-   if (code == ADDR_EXPR)
- {
-   unsigned int bitalign;
-   get_object_alignment_1 (TREE_OPERAND (expr, 0), bitalign, residue);
-   *residue /= BITS_PER_UNIT;
-   return bitalign / BITS_PER_UNIT;
- }
-   else if (code == POINTER_PLUS_EXPR)
- {
-   tree op0, op1;
-   unsigned HOST_WIDE_INT modulus;
-   enum tree_code inner_code;
- 
-   op0 = TREE_OPERAND (expr, 0);
-   STRIP_NOPS (op0);
-   modulus = get_pointer_modulus_and_residue (op0, residue,
-allow_func_align);
- 
-   op1 = TREE_OPERAND (expr, 1);
-   STRIP_NOPS (op1);
-   inner_code = TREE_CODE (op1);
-   if (inner_code == INTEGER_CST)
-   {
- *residue += TREE_INT_CST_LOW (op1);
- return modulus;
-   }
-   else if (inner_code == MULT_EXPR)
-   {
- op1 = TREE_OPERAND (op1, 1);
- if (TREE_CODE (op1) == INTEGER_CST)
-   {
- unsigned HOST_WIDE_INT align;
- 
- /* Compute the greatest power-of-2 divisor of op1.  */
- align = TREE_INT_CST_LOW (op1);
- align = -align;
- 
- /* If align is non-zero and less than *modulus, replace
-*modulus with align., If align is 0, then either op1 is 0
-or the greatest power-of-2 divisor of op1 doesn't fit in an
-unsigned HOST_WIDE_INT.  In either case, no additional
-constraint is imposed.  */
- if (align)
-   modulus = MIN (modulus, align);
- 
- return modulus;
-   }
-   }
- }
- 
-   /* If we get here, we were unable to determine anything useful about the
-  expression.  */
-   return 1;
- }
- 
  /* Helper function for fold_vec_perm.  Store elements of VECTOR_CST or

Re: [PATCH][RFC] Add FRE in pass_vectorize

2015-07-02 Thread Alan Lawrence


Jeff Law wrote:

On 06/24/2015 01:59 AM, Richard Biener wrote:

And then there is the possibility of making passes generate less
needs to perform cleanups after them - like in the present case
with the redundant IVs make them more appearant redundant by
CSEing the initial value and step during vectorizer code generation.
I'm playing with the idea of adding a simple CSE machinery to
the gimple_build () interface (aka match-and-simplify).  It
eventually invokes (well, not currently, but that can be fixed)
maybe_push_res_to_seq which is a good place to maintain a
table of already generated expressions.  That of course only
works if you either always append to the same sequence or at least
insert at the same place.
As you know we've gone back and forth on this in the past.  It's always 
a trade-off.  I still ponder from time to time putting the simple CSE 
and cprop bits back into the SSA rewriting phase to avoid generating all 
kinds of garbage that just needs to be cleaned up later -- particularly 
for incremental SSA updates.


Coming to this rather late, and without the background knowledge about having 
gone back and forth, sorry! But what are the arguments against this? Am I right 
in thinking that the SSA Rewriting phase would not trigger as often as 
gimple_build(), or are these the same thing?


Presumably when you say simple CSE machinery you'd have to bail out quickly 
from tricky cases like, say:


if (P)
  {
use ...expr...
  }
...
if (Q)
  {
now building a new ...expr... here
  }

Thanks, Alan

[PATCH 2/4][ARM][PR target/65697][5.1] Backport stronger barriers for __sync,compare-and-swap builtins.

2015-07-02 Thread Matthew Wahab


This patch backports the changes made to strengthen the barriers emitted for
the __sync compare-and-swap builtins.

The trunk patch submission is at
https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01411.html
The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-06/msg01236.html

Tested the series for arm-none-linux-gnueabihf with check-gcc

Ok for the branch?
Matthew

2015-07-02  Matthew Wahab  matthew.wa...@arm.com

Backport from trunk:
2015-06-29  Matthew Wahab  matthew.wa...@arm.com

PR target/65697
* config/armc/arm.c (arm_split_compare_and_swap): For ARMv8,
replace an initial acquire barrier with final barrier.

From fce96ded43d39847473b67e54e3146924c703f40 Mon Sep 17 00:00:00 2001
From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
Date: Mon, 29 Jun 2015 16:09:10 +
Subject: [PATCH 2/4] 2015-07-01  Matthew Wahab  matthew.wa...@arm.com

	Backport
	PR target/65697
	* config/armc/arm.c (arm_split_compare_and_swap): For ARMv8, replace an
	initial acquire barrier with final barrier.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@225133 138bc75d-0d04-0410-961f-82ee72b054a4

Conflicts:
	gcc/ChangeLog

Change-Id: Ifab505d792d6227c7d2231813dfb2e7826f0f450
---
 gcc/config/arm/arm.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index b36dfb0..49005fb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -27742,6 +27742,8 @@ arm_split_compare_and_swap (rtx operands[])
   scratch = operands[7];
   mode = GET_MODE (mem);
 
+  bool is_armv8_sync = arm_arch8  is_mm_sync (mod_s);
+
   bool use_acquire = TARGET_HAVE_LDACQ
   !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
 			  || is_mm_release (mod_s));
@@ -27750,6 +27752,11 @@ arm_split_compare_and_swap (rtx operands[])
   !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
 			  || is_mm_acquire (mod_s));
 
+  /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
+ a full barrier is emitted after the store-release.  */
+  if (is_armv8_sync)
+use_acquire = false;
+
   /* Checks whether a barrier is needed and emits one accordingly.  */
   if (!(use_acquire || use_release))
 arm_pre_atomic_barrier (mod_s);
@@ -27790,7 +27797,8 @@ arm_split_compare_and_swap (rtx operands[])
 emit_label (label2);
 
   /* Checks whether a barrier is needed and emits one accordingly.  */
-  if (!(use_acquire || use_release))
+  if (is_armv8_sync
+  || !(use_acquire || use_release))
 arm_post_atomic_barrier (mod_s);
 
   if (is_mm_relaxed (mod_f))
-- 
1.9.1

[PATCH][11/n] Remove GENERIC stmt combining from SCCVN

2015-07-02 Thread Richard Biener


This moves floating-point related comparison foldings from
fold_comparison to match.pd.  I noticed we call fold_comparison
only for a subset of tcc_comparison - the newly introduced
simple_comparison operator list and changes to present patterns
reflect that.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-07-02  Richard Biener  rguent...@suse.de

* fold-const.c (fold_mathfn_compare): Remove.
(fold_inf_compare): Likewise.
(fold_comparison): Move floating point comparison simplifications...
* match.pd: ... to patterns here.  Introduce simple_comparisons
operator list and use it for patterns formerly in fold_comparison.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 225305)
+++ gcc/fold-const.c(working copy)
@@ -145,10 +145,6 @@ static tree fold_binary_op_with_conditio
 enum tree_code, tree,
 tree, tree,
 tree, tree, int);
-static tree fold_mathfn_compare (location_t,
-enum built_in_function, enum tree_code,
-tree, tree, tree);
-static tree fold_inf_compare (location_t, enum tree_code, tree, tree, tree);
 static tree fold_div_compare (location_t, enum tree_code, tree, tree, tree);
 static bool reorder_operands_p (const_tree, const_tree);
 static tree fold_negate_const (tree, tree);
@@ -6418,199 +6414,6 @@ fold_real_zero_addition_p (const_tree ty
   return negate  !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type));
 }
 
-/* Subroutine of fold() that checks comparisons of built-in math
-   functions against real constants.
-
-   FCODE is the DECL_FUNCTION_CODE of the built-in, CODE is the comparison
-   operator: EQ_EXPR, NE_EXPR, GT_EXPR, LT_EXPR, GE_EXPR or LE_EXPR.  TYPE
-   is the type of the result and ARG0 and ARG1 are the operands of the
-   comparison.  ARG1 must be a TREE_REAL_CST.
-
-   The function returns the constant folded tree if a simplification
-   can be made, and NULL_TREE otherwise.  */
-
-static tree
-fold_mathfn_compare (location_t loc,
-enum built_in_function fcode, enum tree_code code,
-tree type, tree arg0, tree arg1)
-{
-  REAL_VALUE_TYPE c;
-
-  if (BUILTIN_SQRT_P (fcode))
-{
-  tree arg = CALL_EXPR_ARG (arg0, 0);
-  machine_mode mode = TYPE_MODE (TREE_TYPE (arg0));
-
-  c = TREE_REAL_CST (arg1);
-  if (REAL_VALUE_NEGATIVE (c))
-   {
- /* sqrt(x)  y is always false, if y is negative.  */
- if (code == EQ_EXPR || code == LT_EXPR || code == LE_EXPR)
-   return omit_one_operand_loc (loc, type, integer_zero_node, arg);
-
- /* sqrt(x)  y is always true, if y is negative and we
-don't care about NaNs, i.e. negative values of x.  */
- if (code == NE_EXPR || !HONOR_NANS (mode))
-   return omit_one_operand_loc (loc, type, integer_one_node, arg);
-
- /* sqrt(x)  y is the same as x = 0, if y is negative.  */
- return fold_build2_loc (loc, GE_EXPR, type, arg,
- build_real (TREE_TYPE (arg), dconst0));
-   }
-  else if (code == GT_EXPR || code == GE_EXPR)
-   {
- REAL_VALUE_TYPE c2;
-
- REAL_ARITHMETIC (c2, MULT_EXPR, c, c);
- real_convert (c2, mode, c2);
-
- if (REAL_VALUE_ISINF (c2))
-   {
- /* sqrt(x)  y is x == +Inf, when y is very large.  */
- if (HONOR_INFINITIES (mode))
-   return fold_build2_loc (loc, EQ_EXPR, type, arg,
-   build_real (TREE_TYPE (arg), c2));
-
- /* sqrt(x)  y is always false, when y is very large
-and we don't care about infinities.  */
- return omit_one_operand_loc (loc, type, integer_zero_node, arg);
-   }
-
- /* sqrt(x)  c is the same as x  c*c.  */
- return fold_build2_loc (loc, code, type, arg,
- build_real (TREE_TYPE (arg), c2));
-   }
-  else if (code == LT_EXPR || code == LE_EXPR)
-   {
- REAL_VALUE_TYPE c2;
-
- REAL_ARITHMETIC (c2, MULT_EXPR, c, c);
- real_convert (c2, mode, c2);
-
- if (REAL_VALUE_ISINF (c2))
-   {
- /* sqrt(x)  y is always true, when y is a very large
-value and we don't care about NaNs or Infinities.  */
- if (! HONOR_NANS (mode)  ! HONOR_INFINITIES (mode))
-   return omit_one_operand_loc (loc, type, integer_one_node, arg);
-
- /* sqrt(x)  y is x != +Inf when y is very large and we
-don't care about NaNs.  */
- if (! HONOR_NANS (mode))
-   return fold_build2_loc (loc, NE_EXPR, type, arg,
-   build_real (TREE_TYPE (arg), c2));

Re: C++ PATCH to change default dialect to C++14

2015-07-02 Thread Richard Biener

On Thu, Jul 2, 2015 at 3:05 PM, Richard Biener
richard.guent...@gmail.com wrote:
 On Thu, Jul 2, 2015 at 8:18 AM, Marc Glisse marc.gli...@inria.fr wrote:
 On Thu, 2 Jul 2015, Jason Merrill wrote:

 On 07/02/2015 12:10 AM, Jim Wilson wrote:

 This is a known gmp problem, documented in PR56019 and in
  https://gcc.gnu.org/gcc-4.9/porting_to.html
 near the bottom where it discusses cstddef.h changes.


 This document also says that A workaround until libraries get updated is
 to include cstddef or stddef.h before any headers from that library.

 Can you try modifying the graphite* files accordingly?


 See also

 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65988#c1

 system.h looks like the right way to include gmp.h in gcc, currently.

 Yeah, I _think_ the fix is to move the isl includes below the system.h
 include.

And it should only affect the files including isl/val_gmp.h which includes
gmp.h.

 Richard.

 --
 Marc Glisse

[Patch ARM-AArch64/testsuite Neon intrinsics: vget_lane

2015-07-02 Thread Christophe Lyon

Hi,

Here is the missing test for ARM/AArch64 AdvSIMD intrinsic: vget_lane.

Tested on arm, armeb, aarch64 and aarch64_be targets (using QEMU).

The tests all pass, expect on armeb where vgetq_lane_s64 and
vgetq_lane_u64 fail. I haven't investigated in details yet.

OK for trunk?

2015-07-02  Christophe Lyon  christophe.l...@linaro.org

* gcc.target/aarch64/advsimd-intrinsics/vget_lane.c: New testcase.


diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
new file mode 100644
index 000..5806050
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
@@ -0,0 +1,125 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+/* Expected results.  */
+int8_t expected_s8   = 0xf7;
+int16_texpected_s16  = 0xfff3;
+int32_texpected_s32  = 0xfff1;
+int64_texpected_s64  = 0xfff0;
+uint8_texpected_u8   = 0xf6;
+uint16_t   expected_u16  = 0xfff2;
+uint32_t   expected_u32  = 0xfff1;
+uint64_t   expected_u64  = 0xfff0;
+poly8_texpected_p8   = 0xf6;
+poly16_t   expected_p16  = 0xfff2;
+hfloat32_t expected_f32  = 0xc170;
+
+int8_t expectedq_s8  = 0xff;
+int16_texpectedq_s16 = 0xfff5;
+int32_texpectedq_s32 = 0xfff3;
+int64_texpectedq_s64 = 0xfff1;
+uint8_texpectedq_u8  = 0xfe;
+uint16_t   expectedq_u16 = 0xfff6;
+uint32_t   expectedq_u32 = 0xfff2;
+uint64_t   expectedq_u64 = 0xfff1;
+poly8_texpectedq_p8  = 0xfe;
+poly16_t   expectedq_p16 = 0xfff6;
+hfloat32_t expectedq_f32 = 0xc150;
+
+int error_found = 0;
+
+#define TEST_MSG VGET_LANE
+void exec_vget_lane (void)
+{
+  /* vec=vget_lane(vec, lane), then store the result.  */
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)\
+  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+  if (VAR(var, T1, W) != expected##Q##_##T2##W) { \
+fprintf(stderr,   \
+   ERROR in %s (%s line %d in result '%s') at type %s   \
+   got 0x% PRIx##W  != 0x% PRIx##W \n,  \
+   TEST_MSG, __FILE__, __LINE__,  \
+   STR(expected##Q##_##T2##W),\
+   STR(VECT_NAME(T1, W, N)),  \
+   VAR(var, T1, W),   \
+   expected##Q##_##T2##W);\
+error_found = 1;  \
+  }
+
+  /* Special variant for floating-point.  */
+  union {
+uint32_t var_int32;
+float32_t var_float32;
+  } var_int32_float32;
+
+#define TEST_VGET_LANE_FP(Q, T1, T2, W, N, L) \
+  VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+  var_int##W##_float##W.var_float##W = VAR(var, T1, W);
   \
+  if (var_int##W##_float##W.var_int##W != expected##Q##_##T2##W) {\
+fprintf(stderr,   \
+   ERROR in %s (%s line %d in result '%s') at type %s   \
+   got 0x% PRIx##W  != 0x% PRIx##W \n,  \
+   TEST_MSG, __FILE__, __LINE__,  \
+   STR(expected##Q##_##T2##W),\
+   STR(VECT_NAME(T1, W, N)),  \
+   var_int##W##_float##W.var_int##W,  \
+   expected##Q##_##T2##W);\
+error_found = 1;  \
+  }
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Scalar variables.  */
+  VAR_DECL(var, int, 8);
+  VAR_DECL(var, int, 16);
+  VAR_DECL(var, int, 32);
+  VAR_DECL(var, int, 64);
+  VAR_DECL(var, uint, 8);
+  VAR_DECL(var, uint, 16);
+  VAR_DECL(var, uint, 32);
+  VAR_DECL(var, uint, 64);
+  VAR_DECL(var, poly, 8);
+  VAR_DECL(var, poly, 16);
+  VAR_DECL(var, float, 32);
+
+  /* Initialize input values.  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VGET_LANE(, int, s, 8, 8, 7);
+  TEST_VGET_LANE(, int, s, 16, 4, 3);
+  TEST_VGET_LANE(, int, s, 32, 2, 1);
+  TEST_VGET_LANE(, int, s, 64, 1, 0);
+  TEST_VGET_LANE(, uint, u, 8, 8, 6);
+  TEST_VGET_LANE(, uint, u, 16, 4, 2);
+  TEST_VGET_LANE(, uint, u, 32, 2, 1);
+  TEST_VGET_LANE(, uint, u, 64, 1, 0);
+  TEST_VGET_LANE(, poly, p, 8, 8, 6);
+  TEST_VGET_LANE(, poly, p, 16, 4, 2);
+  TEST_VGET_LANE_FP(, float, f, 32, 2, 1);
+
+  TEST_VGET_LANE(q, int, s, 8, 16, 15);
+  TEST_VGET_LANE(q, int, s, 16, 8, 5);
+

Re: [PATCH] c/66516 - missing diagnostic on taking the address of a builtin function

2015-07-02 Thread Joseph Myers

On Sun, 28 Jun 2015, Martin Sebor wrote:

 2015-06-28  Martin Sebor  mse...@redhat.com
 
   pr c/66516
   * c-tree.h (c_validate_addressable): New function.
   * c-typeck.c (convert_arguments, parser_build_unary_op): Call it.
   (build_conditional_expr, c_cast_expr, convert_for_assignment): Same.
   (build_binary_op, c_objc_common_truthvalue_conversion): Same.
   (c_validate_addressable): Define function.

I don't think c_validate_addressable is a good name - given that it's 
called for lots of things that aren't addressable, in contexts in which 
there is no need for them to be addressable, and doesn't do checks of 
addressability in contexts where they are actually needed and done 
elsewhere (e.g. checks for not taking the address of a register variable).  
The question seems to be something more like: is the expression used as an 
operand something it's OK to use as an operand at all?

What is the logic for the list of functions above being a complete list of 
the places that need changes?

 @@ -4477,11 +4486,22 @@ build_conditional_expr (location_t colon_loc, tree 
 ifexp, bool ifexp_bcp,
|| TREE_CODE (TREE_TYPE (op2)) == ERROR_MARK)
  return error_mark_node;
 
 +  if (TREE_CODE (TREE_TYPE (ifexp)) == POINTER_TYPE
 +   !c_validate_addressable (ifexp,
 +   EXPR_LOCATION (TREE_OPERAND (ifexp, 0
 +return error_mark_node;

How can ifexp be of pointer type?  It's undergone truthvalue conversion 
and should always be of type int at this point (but in any case, you can't 
refer to TREE_OPERAND (ifexp, 0) without knowing what sort of expression 
it is).

 +/* For EXPR that is an ADDR_EXPR or whose type is a FUNCTION_TYPE,
 +   determines whether its operand can have its address taken issues
 +   an error pointing to the location LOC.
 +   Operands that cannot have their address taken are builtin functions
 +   that have no library fallback (no other kinds of expressions are
 +   considered).
 +   Returns true when the expression can have its address taken and
 +   false otherwise.  */

Apart from the naming issue, the comment says nothing about the semantics 
of the function for an argument that's not of that form.

 +  error_at (loc, builtin functions must be directly called);

built-in (see codingconventions.html).

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [PATCH v2] Rerun loop-header-copying just before vectorization

2015-07-02 Thread Alan Lawrence


With those comment fixes, this is OK for the trunk.

jeff


Thank you for review - I've pushed r225311 with what I hope are appropriate 
comment fixes.


Cheers, Alan

Re: [PATCH][RFC] Add FRE in pass_vectorize

2015-07-02 Thread Richard Biener

On Thu, 2 Jul 2015, Alan Lawrence wrote:

 Jeff Law wrote:
  On 06/24/2015 01:59 AM, Richard Biener wrote:
   And then there is the possibility of making passes generate less
   needs to perform cleanups after them - like in the present case
   with the redundant IVs make them more appearant redundant by
   CSEing the initial value and step during vectorizer code generation.
   I'm playing with the idea of adding a simple CSE machinery to
   the gimple_build () interface (aka match-and-simplify).  It
   eventually invokes (well, not currently, but that can be fixed)
   maybe_push_res_to_seq which is a good place to maintain a
   table of already generated expressions.  That of course only
   works if you either always append to the same sequence or at least
   insert at the same place.
  As you know we've gone back and forth on this in the past.  It's always a
  trade-off.  I still ponder from time to time putting the simple CSE and
  cprop bits back into the SSA rewriting phase to avoid generating all kinds
  of garbage that just needs to be cleaned up later -- particularly for
  incremental SSA updates.
 
 Coming to this rather late, and without the background knowledge about having
 gone back and forth, sorry! But what are the arguments against this? Am I
 right in thinking that the SSA Rewriting phase would not trigger as often as
 gimple_build(), or are these the same thing?

Not sure what Jeff means here either.  We don't do any SSA rewriting
nowadays but instead produce SSA gimple even from force_gimple_operand
operating on GENERIC.

As the goal is to get rid of force_gimple_operand (late gimplifying)
this means to build the CSE into its replacement.

 Presumably when you say simple CSE machinery you'd have to bail out quickly
 from tricky cases like, say:
 
 if (P)
   {
 use ...expr...
   }
 ...
 if (Q)
   {
 now building a new ...expr... here
   }

Yes, of course.

Richard.

 Thanks, Alan
 
 

-- 
Richard Biener rguent...@suse.de
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham 
Norton, HRB 21284 (AG Nuernberg)

Re: C++ PATCH to change default dialect to C++14

2015-07-02 Thread Richard Biener

On Thu, Jul 2, 2015 at 8:18 AM, Marc Glisse marc.gli...@inria.fr wrote:
 On Thu, 2 Jul 2015, Jason Merrill wrote:

 On 07/02/2015 12:10 AM, Jim Wilson wrote:

 This is a known gmp problem, documented in PR56019 and in
  https://gcc.gnu.org/gcc-4.9/porting_to.html
 near the bottom where it discusses cstddef.h changes.


 This document also says that A workaround until libraries get updated is
 to include cstddef or stddef.h before any headers from that library.

 Can you try modifying the graphite* files accordingly?


 See also

 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65988#c1

 system.h looks like the right way to include gmp.h in gcc, currently.

Yeah, I _think_ the fix is to move the isl includes below the system.h
include.

Richard.

 --
 Marc Glisse

[PATCH 1/4][ARM][PR target/65697][5.1] Backport stronger barriers for __sync fetch-op builtins.

2015-07-02 Thread Matthew Wahab


The __sync builtins are implemented using barriers that are too weak for ARMv8
targets, this has been fixed on trunk for the ARM back-end. Since GCC-5.1 is
also generating the incorrect code, it should also be fixed.

This patch backports the changes made to strengthen the barriers emitted for
the __sync fetch-and-op/op-and-fetch builtins.

The trunk patch submission is at
https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01410.html
The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-06/msg01235.html

Tested the series for arm-none-linux-gnueabihf with check-gcc

Ok for the branch?
Matthew

2015-07-02  Matthew Wahab  matthew.wa...@arm.com

Backport from trunk:
2015-06-29  Matthew Wahab  matthew.wa...@arm.com

PR target/65697
* config/armc/arm.c (arm_split_atomic_op): For ARMv8, replace an
initial acquire barrier with final barrier.
From e7150c85313fff08153197493db568ca8fe2778a Mon Sep 17 00:00:00 2001
From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
Date: Mon, 29 Jun 2015 16:03:34 +
Subject: [PATCH 1/4] 2015-07-01  Matthew Wahab  matthew.wa...@arm.com

Backport
	PR target/65697
	* config/armc/arm.c (arm_split_atomic_op): For ARMv8, replace an
	initial acquire barrier with final barrier.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@225132 138bc75d-0d04-0410-961f-82ee72b054a4

Conflicts:
	gcc/ChangeLog

Change-Id: I2074541794ecad8847ada04690cd9132a51b6404
---
 gcc/config/arm/arm.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7b279c7..b36dfb0 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -27807,6 +27807,8 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   rtx_code_label *label;
   rtx x;
 
+  bool is_armv8_sync = arm_arch8  is_mm_sync (model);
+
   bool use_acquire = TARGET_HAVE_LDACQ
   !(is_mm_relaxed (model) || is_mm_consume (model)
 			  || is_mm_release (model));
@@ -27815,6 +27817,11 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   !(is_mm_relaxed (model) || is_mm_consume (model)
 			  || is_mm_acquire (model));
 
+  /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
+ a full barrier is emitted after the store-release.  */
+  if (is_armv8_sync)
+use_acquire = false;
+
   /* Checks whether a barrier is needed and emits one accordingly.  */
   if (!(use_acquire || use_release))
 arm_pre_atomic_barrier (model);
@@ -27885,7 +27892,8 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
 
   /* Checks whether a barrier is needed and emits one accordingly.  */
-  if (!(use_acquire || use_release))
+  if (is_armv8_sync
+  || !(use_acquire || use_release))
 arm_post_atomic_barrier (model);
 }
 
-- 
1.9.1

[PATCH][12/n] Remove GENERIC stmt combining from SCCVN

2015-07-02 Thread Richard Biener


This moves the alignment folding to a match.pd pattern (it's
surprising how often the old one triggered via SCCVN stmt combining).

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-07-02  Richard Biener  rguent...@suse.de

* fold-const.c (fold_binary_loc): Move (T)ptr  CST folding...
* match.pd: ... here.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 225312)
+++ gcc/fold-const.c(working copy)
@@ -11069,25 +10729,6 @@ fold_binary_loc (location_t loc,
  fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0));
}
 
-  /* If arg0 is derived from the address of an object or function, we may
-be able to fold this expression using the object or function's
-alignment.  */
-  if (POINTER_TYPE_P (TREE_TYPE (arg0))  TREE_CODE (arg1) == INTEGER_CST)
-   {
- unsigned int align;
- unsigned HOST_WIDE_INT bitpos;
-
- get_pointer_alignment_1 (arg0, align, bitpos);
-
- /* This works because modulus is a power of 2.  If this weren't the
-case, we'd have to replace it by its greatest power-of-2
-divisor: modulus  -modulus.  */
- if (wi::ltu_p (arg1, align / BITS_PER_UNIT))
-   return wide_int_to_tree (type,
-wi::bit_and (arg1,
- bitpos / BITS_PER_UNIT));
-   }
-
   goto associate;
 
 case RDIV_EXPR:
Index: gcc/match.pd
===
--- gcc/match.pd(revision 225312)
+++ gcc/match.pd(working copy)
@@ -668,6 +688,21 @@ (define_operator_list swapped_tcc_compar
(if (ptr_difference_const (@0, @1, diff))
 { build_int_cst_type (type, diff); }
 
+/* If arg0 is derived from the address of an object or function, we may
+   be able to fold this expression using the object or function's
+   alignment.  */
+(simplify
+ (bit_and (convert? @0) INTEGER_CST@1)
+ (if (POINTER_TYPE_P (TREE_TYPE (@0))
+   tree_nop_conversion_p (type, TREE_TYPE (@0)))
+  (with
+   {
+ unsigned int align;
+ unsigned HOST_WIDE_INT bitpos;
+ get_pointer_alignment_1 (@0, align, bitpos);
+   }
+   (if (wi::ltu_p (@1, align / BITS_PER_UNIT))
+{ wide_int_to_tree (type, wi::bit_and (@1, bitpos / BITS_PER_UNIT)); }
 
 
 /* We can't reassociate at all for saturating types.  */

Re: [PATCH 01/13] recog: Increased max number of alternatives - v2

2015-07-02 Thread Andreas Krebbel

On Mon, Jun 08, 2015 at 03:38:03PM +0200, Jakub Jelinek wrote:
 On Mon, Jun 08, 2015 at 03:32:50PM +0200, Andreas Krebbel wrote:
  On 06/01/2015 10:22 AM, Jakub Jelinek wrote:
   On Fri, May 22, 2015 at 09:54:00AM +0200, Andreas Krebbel wrote:
   On Tue, May 19, 2015 at 10:40:26AM +0200, Andreas Krebbel wrote:
   On 05/18/2015 04:19 PM, Richard Biener wrote:
   Please use uint64_t instead.
  
   Done. Ok with that change?
  
   I've applied the following patch.
   
   Note that on current trunk cross compiler from x86_64-linux to
   s390x-linux (admittedly just make cc1 of an older configured tree,
   but with libcpp (normal and build) rebuilt) fails miserably with
   genattrtab: invalid alternative specified for pattern number 1015
   
* recog.h: Increase MAX_RECOG_ALTERNATIVES.
Change type of alternative_mask to uint64_t.
   
   From quick look at genattrtab.c, there are many further spots
   which rely on MAX_RECOG_ALTERNATIVES fitting into int bits.
   
   With this quick patch make cc1 at least succeeds, but no idea whether
   I've caught all the spots which work with bitmasks of alternatives.
  
  I've regtested your patch on S/390 without seeing any problems. Could you 
  please commit it to mainline?
 
 Ok, I will.  Have you looked around if these are all the spots
 that need changing for this in the gen* tools?
 Perhaps trying -fsanitize=undefined and/or valgrind.  I admit I haven't
 spent too much time on it.

Could you please apply this to GCC 5 branch as well? I'm about to
apply the z13 backports now.

Bye,

-Andreas-

RE: [Patch] Add support for IEEE-conformant versions of scalar fmin* and fmax*

2015-07-02 Thread Joseph Myers

On Thu, 2 Jul 2015, David Sherwood wrote:

 
  On Mon, 29 Jun 2015, David Sherwood wrote:
  
   Hi,
  
   I have added new STRICT_MAX_EXPR and STRICT_MIN_EXPR expressions to 
   support the
   IEEE versions of fmin and fmax. This is done by recognising the math 
   library
   fmax and fmin builtin functions in a similar way to how this is done 
   for
   -ffast-math. This also allows us to vectorise the IEEE max/min functions 
   for
   targets that support it, for example aarch64/aarch32.
  
  This patch is missing documentation.  You need to document the new insn
  patterns in md.texi and the new tree codes in generic.texi.
 
 Hi, I've uploaded a new patch with the documentation. Hope this is ok.

In various places where you refer to one operand being NaN, I think you 
mean one operand being a *quiet* NaN (if one is a signaling NaN - only 
supported by GCC if -fsignaling-nans - the IEEE minNum and maxNum 
operations raise invalid and return a quiet NaN).

-- 
Joseph S. Myers
jos...@codesourcery.com

[PATCH 4/4][ARM][PR target/65697][5.1] Fix tests for __sync_builtins.

2015-07-02 Thread Matthew Wahab


This patch backports fixes for the __sync builtin tests.

The trunk patch submission is at
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00031.html
The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-07/msg00025.html

Tested the series for arm-none-linux-gnueabihf with check-gcc

Ok for the branch?
Matthew

gcc/testsuite
2015-07-02  Matthew Wahab  matthew.wa...@arm.com

Backport from trunk:
2015-07-01  Matthew Wahab  matthew.wa...@arm.com

* gcc.target/arm/armv8-sync-comp-swap.c: Replace
'do-require-effective-target' with 'dg-require-effective-target'.
* gcc.target/arm/armv8-sync-op-full.c: Likewise.
* gcc.target/arm/armv8-sync-op-release.c: Likewise.
* gcc.target/arm/armv8-sync-op-acquire.c: Likewise.  Also, replace
'stlex' with 'strex' as the expected output.

From 3e48ba2ea1848ca9d17d627ebbdc9c6fb2e8c21b Mon Sep 17 00:00:00 2001
From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
Date: Wed, 1 Jul 2015 12:16:01 +
Subject: [PATCH 4/4] 2015-07-01  Matthew Wahab  matthew.wa...@arm.com

	Backport
	* gcc.target/arm/armv8-sync-comp-swap.c: Replace
	'do-require-effective-target' with 'dg-require-effective-target'.
	* gcc.target/arm/armv8-sync-op-full.c: Likewise.
	* gcc.target/arm/armv8-sync-op-release.c: Likewise.
	* gcc.target/arm/armv8-sync-op-acquire.c: Likewise.  Also, replace
'stlex' with 'strex' as the expected output.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@225241 138bc75d-0d04-0410-961f-82ee72b054a4

Conflicts:
	gcc/testsuite/ChangeLog

Change-Id: I19f2013f7bdd2dd035f36f0f7c9829cf6a86fb8e
---
 gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c  | 2 +-
 gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c | 4 ++--
 gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c| 2 +-
 gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c b/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
index f96c81a..0e95986 100644
--- a/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-require-effective-target arm_arch_v8a_ok } */
 /* { dg-options -O2 } */
 /* { dg-add-options arm_arch_v8a } */
 
diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c b/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
index 8d6659b..c448599 100644
--- a/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
@@ -1,10 +1,10 @@
 /* { dg-do compile } */
-/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-require-effective-target arm_arch_v8a_ok } */
 /* { dg-options -O2 } */
 /* { dg-add-options arm_arch_v8a } */
 
 #include ../aarch64/sync-op-acquire.x
 
 /* { dg-final { scan-assembler-times ldrex 1 } } */
-/* { dg-final { scan-assembler-times stlex 1 } } */
+/* { dg-final { scan-assembler-times strex 1 } } */
 /* { dg-final { scan-assembler-times dmb 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c b/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
index a5ad3bd..cce9e00 100644
--- a/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-require-effective-target arm_arch_v8a_ok } */
 /* { dg-options -O2 } */
 /* { dg-add-options arm_arch_v8a } */
 
diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c b/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
index 0d3be7b..502a266 100644
--- a/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-require-effective-target arm_arch_v8a_ok } */
 /* { dg-options -O2 } */
 /* { dg-add-options arm_arch_v8a } */
 
-- 
1.9.1

[PATCH] combine: Fix an oversight in make_compound_operation (PR66706)

2015-07-02 Thread Segher Boessenkool

Combine expands things like
  (zero_extend:DI (lshiftrt:SI (reg:SI) (reg:SI)))
to
  (and:DI (subreg:DI (lshiftrt:SI (reg:SI) (reg:SI)) 0)
  (const_int 0x))
to do simplifications on, and then make_compound_operation is supposed
to transform it back to the simpler form (with the zero_extend).

But it doesn't; it tries to make a zero_extract and then gives up.

This fixes it.  Bootstrapped and regression tested on powerpc64-linux
(-m32,-m32/-mpowerpc64,-m64,-m64/-mlra); no regressions.  Committing.


Segher


2015-07-02  Segher Boessenkool  seg...@kernel.crashing.org

PR rtl-optimization/66706
* combine.c (make_compound_operation): If an AND of SUBREG of
LSHIFTRT does not simplify, see if just the AND of SUBREG does.

---
 gcc/combine.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/gcc/combine.c b/gcc/combine.c
index 8eaae7c..b97aa10 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7893,6 +7893,15 @@ make_compound_operation (rtx x, enum rtx_code in_code)
  new_rtx = make_extraction (GET_MODE (SUBREG_REG (XEXP (x, 0))), 
new_rtx, 0,
 XEXP (SUBREG_REG (XEXP (x, 0)), 1), i, 1,
 0, in_code == COMPARE);
+
+ /* If that didn't give anything, see if the AND simplifies on
+its own.  */
+ if (!new_rtx  i = 0)
+   {
+ new_rtx = make_compound_operation (XEXP (x, 0), next_code);
+ new_rtx = make_extraction (mode, new_rtx, 0, NULL_RTX, i, 1,
+0, in_code == COMPARE);
+   }
}
   /* Same as previous, but for (xor/ior (lshiftrt...) (lshiftrt...)).  */
   else if ((GET_CODE (XEXP (x, 0)) == XOR
-- 
1.8.1.4

[PATCH 3/4][ARM][PR target/65697][5.1] Add tests for __sync_builtins.

2015-07-02 Thread Matthew Wahab


This patch backports the tests added for code generated by the ARM back-end for
the __sync builtins.

The trunk patch submission is at
https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01412.html
The commit is at https://gcc.gnu.org/ml/gcc-cvs/2015-06/msg01237.html

Tested the series for arm-none-linux-gnueabihf with check-gcc

Ok for the branch?
Matthew

gcc/testsuite
2015-07-02  Matthew Wahab  matthew.wa...@arm.com

Backport from trunk:
2015-06-29  Matthew Wahab  matthew.wa...@arm.com

PR target/65697
* gcc.target/arm/armv-sync-comp-swap.c: New.
* gcc.target/arm/armv-sync-op-acquire.c: New.
* gcc.target/arm/armv-sync-op-full.c: New.
* gcc.target/arm/armv-sync-op-release.c: New.

From 27eb0d570c831be52f4d544bf40abe8a6a4246c3 Mon Sep 17 00:00:00 2001
From: mwahab mwahab@138bc75d-0d04-0410-961f-82ee72b054a4
Date: Mon, 29 Jun 2015 16:12:12 +
Subject: [PATCH 3/4] 2015-07-01  Matthew Wahab  matthew.wa...@arm.com

	Backport
	PR target/65697
	* gcc.target/arm/armv-sync-comp-swap.c: New.
	* gcc.target/arm/armv-sync-op-acquire.c: New.
	* gcc.target/arm/armv-sync-op-full.c: New.
	* gcc.target/arm/armv-sync-op-release.c: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@225134 138bc75d-0d04-0410-961f-82ee72b054a4

Conflicts:
	gcc/ChangeLog

Change-Id: I16c02786765bbbfbb287fba863ba27fb6a56ddc5
---
 gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c  | 10 ++
 gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c | 10 ++
 gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c| 10 ++
 gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c |  8 
 4 files changed, 38 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
 create mode 100644 gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
 create mode 100644 gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
 create mode 100644 gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c

diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c b/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
new file mode 100644
index 000..f96c81a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-options -O2 } */
+/* { dg-add-options arm_arch_v8a } */
+
+#include ../aarch64/sync-comp-swap.x
+
+/* { dg-final { scan-assembler-times ldrex 2 } } */
+/* { dg-final { scan-assembler-times stlex 2 } } */
+/* { dg-final { scan-assembler-times dmb 2 } } */
diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c b/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
new file mode 100644
index 000..8d6659b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-options -O2 } */
+/* { dg-add-options arm_arch_v8a } */
+
+#include ../aarch64/sync-op-acquire.x
+
+/* { dg-final { scan-assembler-times ldrex 1 } } */
+/* { dg-final { scan-assembler-times stlex 1 } } */
+/* { dg-final { scan-assembler-times dmb 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c b/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
new file mode 100644
index 000..a5ad3bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-options -O2 } */
+/* { dg-add-options arm_arch_v8a } */
+
+#include ../aarch64/sync-op-full.x
+
+/* { dg-final { scan-assembler-times ldrex 12 } } */
+/* { dg-final { scan-assembler-times stlex 12 } } */
+/* { dg-final { scan-assembler-times dmb 12 } } */
diff --git a/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c b/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
new file mode 100644
index 000..0d3be7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { do-require-effective-target arm_arch_v8a_ok } */
+/* { dg-options -O2 } */
+/* { dg-add-options arm_arch_v8a } */
+
+#include ../aarch64/sync-op-release.x
+
+/* { dg-final { scan-assembler-times stl 1 } } */
-- 
1.9.1

[scalar-storage-order] Add -fsso-struct switch

2015-07-02 Thread Eric Botcazou

This adds a -fsso-struct switch to the C family of languages that makes it 
possible to change the default scalar storage order (a nice way of shooting 
oneself in the foot if you ask me, but the Ada compiler has its own version of 
this, so that's sort of fair).  It is modelled on #pragma pack/-fpack-struct.

Tested on x86_64-suse-linux. applied on the branch.


* doc/extend.texi (Structure-Layout Pragmas): Document default value of
scalar_storage_order and add cross-reference.
* doc/invoke.texi (C Dialect Options): Document -fsso-struct.
* flag-types.h (enum scalar_storage_order_kind): New enumeration.
c-family:
* c.opt (fsso-struct=): New option.
* c-pragma.c (enum scalar_storage_order_kind): Delete.
(global_sso_kind): Rename into...
(global_sso): ...this.  Do not initialize it here.
(maybe_apply_pragma_scalar_storage_order): Adjust to renaming.
Replace SSO_DEFAULT with SSO_NATIVE.
(handle_pragma_scalar_storage_order): Likewise with default_sso.
(init_pragma): Initialize global_sso to default_sso.
testsuite/
* c-c++-common/sso-6.c: New test.
* c-c++-common/sso-7.c: Likewise.

-- 
Eric BotcazouIndex: doc/extend.texi
===
--- doc/extend.texi	(revision 225237)
+++ doc/extend.texi	(working copy)
@@ -18414,8 +18414,9 @@ documented @code{__attribute__ ((scalar_
 of the scalar fields to big-endian.
 @item @code{#pragma scalar_storage_order little-endian} sets the storage order
 of the scalar fields to little-endian.
-@item @code{#pragma scalar_storage_order default} goes back to the default
-endianness.
+@item @code{#pragma scalar_storage_order default} goes back to the endianness
+that was in effect when compilation started (see also command-line option
+@option{-fsso-struct=@var{endianness}} @pxref{C Dialect Options}).
 @end enumerate
 
 @node Weak Pragmas
Index: doc/invoke.texi
===
--- doc/invoke.texi	(revision 224467)
+++ doc/invoke.texi	(working copy)
@@ -169,10 +169,11 @@ in the following sections.
 -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol
 -fno-asm  -fno-builtin  -fno-builtin-@var{function} @gol
 -fhosted  -ffreestanding -fopenacc -fopenmp -fopenmp-simd @gol
--fms-extensions -fplan9-extensions -trigraphs -traditional -traditional-cpp @gol
+-fms-extensions -fplan9-extensions -fsso-struct=@var{endianness}
 -fallow-single-precision  -fcond-mismatch -flax-vector-conversions @gol
 -fsigned-bitfields  -fsigned-char @gol
--funsigned-bitfields  -funsigned-char}
+-funsigned-bitfields  -funsigned-char @gol
+-trigraphs -traditional -traditional-cpp}
 
 @item C++ Language Options
 @xref{C++ Dialect Options,,Options Controlling C++ Dialect}.
@@ -2066,6 +2067,17 @@ These options control whether a bit-fiel
 declaration does not use either @code{signed} or @code{unsigned}.  By
 default, such a bit-field is signed, because this is consistent: the
 basic integer types such as @code{int} are signed types.
+
+@item -fsso-struct=@var{endianness}
+@opindex fsso-struct
+Set the default scalar storage order of structures, unions and classes to
+the specified endianness.  The accepted values are @samp{big-endian} and
+@samp{little-endian}.  If the option is not passed, the compiler uses the
+native endianness of the target.
+
+@strong{Warning:} the @option{-fsso-struct} switch causes GCC to generate
+code that is not binary compatible with code generated without it if the
+specified endianness is not the native endianness of the target.
 @end table
 
 @node C++ Dialect Options
Index: c-family/c.opt
===
--- c-family/c.opt	(revision 224463)
+++ c-family/c.opt	(working copy)
@@ -1402,6 +1402,19 @@ Enable C++14 sized deallocation support
 fsquangle
 C++ ObjC++ Ignore Warn(switch %qs is no longer supported)
 
+fsso-struct=
+C C++ Joined RejectNegative Enum(sso_struct) Var(default_sso) Init(SSO_NATIVE)
+-fsso-struct=[big-endian|little-endian] Set the default scalar storage order
+
+Enum
+Name(sso_struct) Type(enum scalar_storage_order_kind) UnknownError(unrecognized scalar storage order value %qs)
+
+EnumValue
+Enum(sso_struct) String(big-endian) Value(SSO_BIG_ENDIAN)
+
+EnumValue
+Enum(sso_struct) String(little-endian) Value(SSO_LITTLE_ENDIAN)
+
 fstats
 C++ ObjC++ Var(flag_detailed_statistics)
 Display statistics accumulated during compilation
Index: c-family/c-pragma.c
===
--- c-family/c-pragma.c	(revision 225237)
+++ c-family/c-pragma.c	(working copy)
@@ -400,19 +400,12 @@ handle_pragma_weak (cpp_reader * ARG_UNU
 }
 }
 
-enum scalar_storage_order_kind
-{
-  SSO_DEFAULT,
-  SSO_BIG_ENDIAN,
-  SSO_LITTLE_ENDIAN
-};
-
-static enum scalar_storage_order_kind global_sso_kind = SSO_DEFAULT;
+static enum scalar_storage_order_kind global_sso;

Re: [PATCH] Discard Scops for which entry==exit

2015-07-02 Thread Tobias Grosser


On 07/02/2015 05:37 PM, Aditya K wrote:

A test case (gcc/testsuite/gcc.dg/graphite/pr18792.c) came up when we removed 
`graphite-scop-detection.c:limit_scops'.
The test case is a scop where entry==exit,

BB5 (*#) - BB6 (#);
BB6 - BB5;

In this case BB2 is out of the scop. This is basically an empty (infinite) loop.


LGTM.

Tobias

RE: [PATCH] Discard Scops for which entry==exit

2015-07-02 Thread Aditya K

A test case (gcc/testsuite/gcc.dg/graphite/pr18792.c) came up when we removed 
`graphite-scop-detection.c:limit_scops'.
The test case is a scop where entry==exit,

BB5 (*#) - BB6 (#);
BB6 - BB5;

In this case BB2 is out of the scop. This is basically an empty (infinite) loop.



2015-06-29  Aditya Kumar  aditya...@samsung.com
    Sebastian Pop s@samsung.com

    * graphite-scop-detection.c (build_scops_1): Discard scops for which 
entry==exit


---
 gcc/graphite-scop-detection.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index e8ddecd..f57cc4a 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -810,7 +810,14 @@ build_scops_1 (basic_block current, loop_p outermost_loop,
 {
   open_scop.exit = sinfo.exit;
   gcc_assert (open_scop.exit);
-  scops-safe_push (open_scop);
+  if (open_scop.entry != open_scop.exit)
+    scops-safe_push (open_scop);
+  else
+    {
+      sinfo.difficult = true;
+      sinfo.exits = false;
+      sinfo.exit = NULL;
+    }
 }
 
   result.exit = sinfo.exit;
-- 
2.1.0.243.g30d45f7




 Date: Thu, 2 Jul 2015 09:53:25 +0200
 From: tob...@grosser.es
 To: hiradi...@msn.com; seb...@gmail.com
 CC: gcc-patches@gcc.gnu.org
 Subject: Re: [PATCH] Discard Scops for which entry==exit

 On 06/30/2015 05:47 PM, Aditya K wrote:
 Hi Tobias,
 A test case (gcc/testsuite/gcc.dg/graphite/pr18792.c) came up when we 
 removed `graphite-scop-detection.c:limit_scops'.
 The test case is a scop where entry==exit,

 BB5 (*#) - BB6 (#);
 BB6 - BB5;

 In this case BB2 is out of the scop. This is basically an empty (infinite) 
 loop with no entr

 OK, maybe mention this in the commit message.


 Best,
 Tobias

Re: [C++/preprocessor Patch] PR c++/53690

2015-07-02 Thread Paolo Carlini


On 07/01/2015 11:14 PM, Paolo Carlini wrote:
I stand corrected: in fact we are already using a mix of bool and int 
return types in those functions. Thus I'm also testing the below 
version, which simply changes the return type to bool with true 
meaning success.

Testing went Ok.

Paolo.

[PATCH 3/3] [ARM] PR63870 NEON error messages

2015-07-02 Thread Charles Baylis

gcc/testsuite/ChangeLog:

DATE  Charles Baylis  charles.bay...@linaro.org

* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New 
test.
* gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New 
test.
*

[PATCH 0/3] [ARM] PR63870 improve error messages for NEON vldN_lane/vstN_lane

2015-07-02 Thread Charles Baylis

These patches are a port of the changes do the same thing for AArch64 (see 
https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01984.html)

The first patch ports over some infrastructure, and the second converts the
vldN_lane and vstN_lane intrinsics. The changes required for vget_lane and
vset_lane will be done in a future patch.

The third patch includes the test cases from the AArch64 version, except that
the xfails for arm targets have been removed. If this series gets approved
before the AArch64 patch, I will commit the tests with xfail for aarch64
targets.

OK for trunk?


Charles Baylis (3):
  [ARM] PR63870 Add qualifiers for NEON builtins
  [ARM] PR63870 Mark lane indices of vldN/vstN with appropriate
qualifier
  [ARM] PR63870 Add test cases

 gcc/config/arm/arm-builtins.c  | 69 --
 gcc/config/arm/arm-protos.h|  4 ++
 gcc/config/arm/arm.c   | 20 +++
 gcc/config/arm/arm.h   |  3 +
 gcc/config/arm/neon.md | 49 +++
 .../advsimd-intrinsics/vld2_lane_f32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld2_lane_f64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld2_lane_p8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld2_lane_s16_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld2_lane_s32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld2_lane_s64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld2_lane_s8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld2_lane_u16_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld2_lane_u32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld2_lane_u64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld2_lane_u8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld2q_lane_f32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld2q_lane_f64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld2q_lane_p8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld2q_lane_s16_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld2q_lane_s32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld2q_lane_s64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld2q_lane_s8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld2q_lane_u16_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld2q_lane_u32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld2q_lane_u64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld2q_lane_u8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld3_lane_f32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld3_lane_f64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld3_lane_p8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld3_lane_s16_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld3_lane_s32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld3_lane_s64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld3_lane_s8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld3_lane_u16_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld3_lane_u32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld3_lane_u64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld3_lane_u8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld3q_lane_f32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld3q_lane_f64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld3q_lane_p8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld3q_lane_s16_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld3q_lane_s32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld3q_lane_s64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld3q_lane_s8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld3q_lane_u16_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld3q_lane_u32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld3q_lane_u64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld3q_lane_u8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld4_lane_f32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld4_lane_f64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld4_lane_p8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld4_lane_s16_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld4_lane_s32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld4_lane_s64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld4_lane_s8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld4_lane_u16_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld4_lane_u32_indices_1.c   | 15 +
 .../advsimd-intrinsics/vld4_lane_u64_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld4_lane_u8_indices_1.c| 15 +
 .../advsimd-intrinsics/vld4q_lane_f32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld4q_lane_f64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld4q_lane_p8_indices_1.c   | 16 +
 .../advsimd-intrinsics/vld4q_lane_s16_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld4q_lane_s32_indices_1.c  | 15 +
 .../advsimd-intrinsics/vld4q_lane_s64_indices_1.c  | 16 +
 .../advsimd-intrinsics/vld4q_lane_s8_indices_1.c   | 16

[PATCH 1/3] [ARM] PR63870 NEON error messages

2015-07-02 Thread Charles Baylis

gcc/ChangeLog:

DATE  Charles Baylis  charles.bay...@linaro.org

* config/arm/arm-builtins.c (enum arm_type_qualifiers): New enumerators
qualifier_lane_index, qualifier_struct_load_store_lane_index.
(arm_expand_neon_args): New parameter. Remove ellipsis. Handle NEON
argument qualifiers.
(arm_expand_neon_builtin): Handle NEON argument qualifiers.
* config/arm/arm-protos.h: (arm_neon_lane_bounds) New prototype.
* config/arm/arm.c (arm_neon_lane_bounds): New function.
* config/arm/arm.h (ENDIAN_LANE_N): New macro.

Change-Id: Iaa14d8736879fa53776319977eda2089f0a26647
---
 gcc/config/arm/arm-builtins.c | 65 ---
 gcc/config/arm/arm-protos.h   |  4 +++
 gcc/config/arm/arm.c  | 20 +
 gcc/config/arm/arm.h  |  3 ++
 4 files changed, 75 insertions(+), 17 deletions(-)

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index f960e0a..8f1253e 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -77,7 +77,11 @@ enum arm_type_qualifiers
   /* qualifier_const_pointer | qualifier_map_mode  */
   qualifier_const_pointer_map_mode = 0x86,
   /* Polynomial types.  */
-  qualifier_poly = 0x100
+  qualifier_poly = 0x100,
+  /* Lane indices - must be in range, and flipped for bigendian.  */
+  qualifier_lane_index = 0x200,
+  /* Lane indices for single lane structure loads and stores.  */
+  qualifier_struct_load_store_lane_index = 0x400
 };
 
 /*  The qualifier_internal allows generation of a unary builtin from
@@ -1927,6 +1931,8 @@ arm_expand_unop_builtin (enum insn_code icode,
 typedef enum {
   NEON_ARG_COPY_TO_REG,
   NEON_ARG_CONSTANT,
+  NEON_ARG_LANE_INDEX,
+  NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
   NEON_ARG_MEMORY,
   NEON_ARG_STOP
 } builtin_arg;
@@ -1984,9 +1990,9 @@ neon_dereference_pointer (tree exp, tree type, 
machine_mode mem_mode,
 /* Expand a Neon builtin.  */
 static rtx
 arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
- int icode, int have_retval, tree exp, ...)
+ int icode, int have_retval, tree exp,
+ builtin_arg *args)
 {
-  va_list ap;
   rtx pat;
   tree arg[SIMD_MAX_BUILTIN_ARGS];
   rtx op[SIMD_MAX_BUILTIN_ARGS];
@@ -2001,13 +2007,11 @@ arm_expand_neon_args (rtx target, machine_mode 
map_mode, int fcode,
  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
 target = gen_reg_rtx (tmode);
 
-  va_start (ap, exp);
-
   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
 
   for (;;)
 {
-  builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
+  builtin_arg thisarg = args[argc];
 
   if (thisarg == NEON_ARG_STOP)
break;
@@ -2043,17 +2047,46 @@ arm_expand_neon_args (rtx target, machine_mode 
map_mode, int fcode,
op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
  break;
 
+case NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
+ gcc_assert (argc  1);
+ if (CONST_INT_P (op[argc]))
+   {
+ arm_neon_lane_bounds (op[argc], 0,
+   GET_MODE_NUNITS (map_mode), exp);
+ /* Keep to GCC-vector-extension lane indices in the RTL.  */
+ op[argc] =
+   GEN_INT (ENDIAN_LANE_N (map_mode, INTVAL (op[argc])));
+   }
+ goto constant_arg;
+
+case NEON_ARG_LANE_INDEX:
+ /* Must be a previous operand into which this is an index.  */
+ gcc_assert (argc  0);
+ if (CONST_INT_P (op[argc]))
+   {
+ machine_mode vmode = insn_data[icode].operand[argc - 1].mode;
+ arm_neon_lane_bounds (op[argc],
+   0, GET_MODE_NUNITS (vmode), exp);
+ /* Keep to GCC-vector-extension lane indices in the RTL.  */
+ op[argc] = GEN_INT (ENDIAN_LANE_N (vmode, INTVAL (op[argc])));
+   }
+ /* Fall through - if the lane index isn't a constant then
+the next case will error.  */
case NEON_ARG_CONSTANT:
+constant_arg:
  if (!(*insn_data[icode].operand[opno].predicate)
  (op[argc], mode[argc]))
-   error_at (EXPR_LOCATION (exp), incompatible type for argument 
%d, 
-  expected %const int%, argc + 1);
+   {
+ error (%Kargument %d must be a constant immediate,
+exp, argc + 1);
+ return const0_rtx;
+   }
  break;
+
 case NEON_ARG_MEMORY:
  /* Check if expand failed.  */
  if (op[argc] == const0_rtx)
  {
-   va_end (ap);
return 0;
  }
  gcc_assert (MEM_P (op[argc]));
@@ -2076,8 +2109,6 @@

[PATCH 2/3] [ARM] PR63870 NEON error messages

2015-07-02 Thread Charles Baylis

gcc/ChangeLog:

DATE  Charles Baylis  charles.bay...@linaro.org

* config/arm/arm-builtins.c: (arm_load1_qualifiers) Use
qualifier_struct_load_store_lane_index.
(arm_storestruct_lane_qualifiers) Likewise.
* config/arm/neon.md: (neon_vld1_lanemode) Reverse lane numbers for
big-endian.
(neon_vst1_lanemode) Likewise.
(neon_vld2_lanemode) Likewise.
(neon_vst2_lanemode) Likewise.
(neon_vld3_lanemode) Likewise.
(neon_vst3_lanemode) Likewise.
(neon_vld4_lanemode) Likewise.
(neon_vst4_lanemode) Likewise.

Change-Id: Ic39898d288701bc5b712490265be688f5620c4e2
---
 gcc/config/arm/arm-builtins.c |  4 ++--
 gcc/config/arm/neon.md| 49 +++
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 8f1253e..b7b7b12 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -145,7 +145,7 @@ arm_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 static enum arm_type_qualifiers
 arm_load1_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_const_pointer_map_mode,
-  qualifier_none, qualifier_immediate };
+  qualifier_none, qualifier_struct_load_store_lane_index };
 #define LOAD1LANE_QUALIFIERS (arm_load1_lane_qualifiers)
 
 /* The first argument (return type) of a store should be void type,
@@ -164,7 +164,7 @@ arm_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 static enum arm_type_qualifiers
 arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_void, qualifier_pointer_map_mode,
-  qualifier_none, qualifier_immediate };
+  qualifier_none, qualifier_struct_load_store_lane_index };
 #define STORE1LANE_QUALIFIERS (arm_storestruct_lane_qualifiers)
 
 #define v8qi_UP  V8QImode
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 654d9d5..dbd5852 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -4277,8 +4277,9 @@
 UNSPEC_VLD1_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[3]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
+  operands[3] = GEN_INT (lane);
   if (lane  0 || lane = max)
 error (lane out of range);
   if (max == 1)
@@ -4297,8 +4298,9 @@
 UNSPEC_VLD1_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[3]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
+  operands[3] = GEN_INT (lane);
   int regno = REGNO (operands[0]);
   if (lane  0 || lane = max)
 error (lane out of range);
@@ -4383,8 +4385,9 @@
  UNSPEC_VST1_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[2]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
+  operands[2] = GEN_INT (lane);
   if (lane  0 || lane = max)
 error (lane out of range);
   if (max == 1)
@@ -4403,7 +4406,7 @@
  UNSPEC_VST1_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[2]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
   int regno = REGNO (operands[1]);
   if (lane  0 || lane = max)
@@ -4412,8 +4415,8 @@
 {
   lane -= max / 2;
   regno += 2;
-  operands[2] = GEN_INT (lane);
 }
+  operands[2] = GEN_INT (lane);
   operands[1] = gen_rtx_REG (V_HALFmode, regno);
   if (max == 2)
 return vst1.V_sz_elem\t{%P1}, %A0;
@@ -4473,7 +4476,7 @@
UNSPEC_VLD2_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[3]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
   int regno = REGNO (operands[0]);
   rtx ops[4];
@@ -4482,7 +4485,7 @@
   ops[0] = gen_rtx_REG (DImode, regno);
   ops[1] = gen_rtx_REG (DImode, regno + 2);
   ops[2] = operands[1];
-  ops[3] = operands[3];
+  ops[3] = GEN_INT (lane);
   output_asm_insn (vld2.V_sz_elem\t{%P0[%c3], %P1[%c3]}, %A2, ops);
   return ;
 }
@@ -4498,7 +4501,7 @@
UNSPEC_VLD2_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[3]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
   int regno = REGNO (operands[0]);
   rtx ops[4];
@@ -4588,7 +4591,7 @@
  UNSPEC_VST2_LANE))]
   TARGET_NEON
 {
-  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT lane = ENDIAN_LANE_N(MODEmode, INTVAL (operands[2]));
   HOST_WIDE_INT max = GET_MODE_NUNITS (MODEmode);
   int regno = REGNO (operands[1]);
   rtx ops[4];
@@ -4597,7 +4600,7 @@
   ops[0] = operands[0];
   ops[1] = gen_rtx_REG (DImode, regno);
   ops[2] = gen_rtx_REG (DImode, regno + 2);
-  ops[3] = operands[2];
+  ops[3] = GEN_INT

Re: [Patch, fortran] PR52846 - [F2008] Support submodules

2015-07-02 Thread Paul Richard Thomas

Dear All,

Committed as revision 225354.

Compared with the submitted version, I have added another test -
submodule_7.f90. This is a slightly tweaked version of the example in
the F2008 standard. In order to get it to compile, the error produced
by the main program's interface block was suppressed by excluding
module procedures from the error in interface.c. Otherwise, the
compiler complains that module procedures are not module procedures.

Thanks to Damian, Reinhold, Salvatore and FX for help, comments and advice.

I'll get on and sort out the business with private symbols now.

Cheers

Paul


On 30 June 2015 at 15:51, FX fxcoud...@gmail.com wrote:
 Hi Paul,

 I don’t feel confident enough in many parts of the code (including the module 
 part) to formally review it, but from what I’ve read it seemed rather logical 
 and well-commented. If it regtests fine, I think your plan (especially at the 
 current GCC stage) of committing this week is sound.

 One question I had is: does this change the .mod file format in any case? I 
 don’t think, cause you don’t seem to bump the version number, but have you 
 checked on specific cases (like, the mega cp2k example) that the patch indeed 
 does not change existing module files (the ones that do not use submodules)?

 Cheers, and thanks for this patch!

 FX




-- 
Outside of a dog, a book is a man's best friend. Inside of a dog it's
too dark to read.

Groucho Marx

Re: [Fortran f951, C++14] Fix trans-common.c compilation failure on AIX

2015-07-02 Thread David Edelsohn

On Thu, Jul 2, 2015 at 4:32 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Thu, Jul 02, 2015 at 04:20:16PM -0400, David Edelsohn wrote:
 After the change to C++14, Fortran trans-common.c fails to compile on
 AIX due to function declaration conflicts in unistd.h.

 Tobias previously added use of std::map to trans-common.c and included
 map first in the source file, before any GCC configuration headers.
 map inherently includes various system header files before GCC
 system.h and config.h have defined macros affecting system header
 files, causing later conflicts when other system header files are
 included.

 This patch switches the order of inclusion for trans-common.c to
 include map last, after system.h, config.h and other GCC headers, as
 it is included in other GCC source files.

 Generally, system headers should be included either from system.h, or
 in between config.h and system.h, or right after config.h and system.h.
 Including them after myriads of GCC headers risks conflicts with all the
 GCC macros.

graphite-isl-ast-to-gimple.c includes map last.

auto-profile.c includes it after system.h.

I can change the patch to include it after system.h, if that is
preferred.  That order also works on AIX.

Thanks, David

Re: [Fortran f951, C++14] Fix trans-common.c compilation failure on AIX

2015-07-02 Thread Jakub Jelinek

On Thu, Jul 02, 2015 at 04:47:13PM -0400, David Edelsohn wrote:
 I can change the patch to include it after system.h, if that is
 preferred.  That order also works on AIX.

If including it right after system.h works, it is preapproved.

Jakub

Re: [doc] invoke.texi: -mno-fancy-math-387 and FreeBSD

2015-07-02 Thread Gerald Pfeifer

On Thu, 2 Jul 2015, Andreas Tobler wrote:
 Fine with me as far as I can decide/approve.

Below the patch as I applied it.  (I thought I had regenerated
it before, but apparently not.)

 At least for trunk. For 5.2, I guess you must hurry since Richi 
 wants to spin a candidate tomorrow.

Good hint, thanks.  Let me kick off another test; the devil never
sleeps, as they say.

 On 4.9, I can't decide since I have no idea about the release 
 planning. Have to inform myself.

I tend to refer to https://gcc.gnu.org for release status.  And
usually ...and docs only applies. :-)

Gerald


2015-07-03  Gerald Pfeifer  ger...@pfeifer.com

PR target/37072
* doc/invoke.texi (i386 and x86-64 Options): -mno-fancy-math-387
is not actually the default on FreeBSD.

 2015-07-02  Bill Schmidt  wschm...@linux.vnet.ibm.com
 
* config/rs6000/rs6000-builtin.def (CMPGE_16QI): New built-in
Index: doc/invoke.texi
===
--- doc/invoke.texi (revision 225361)
+++ doc/invoke.texi (working copy)
@@ -22570,7 +22570,7 @@
 @opindex mno-fancy-math-387
 Some 387 emulators do not support the @code{sin}, @code{cos} and
 @code{sqrt} instructions for the 387.  Specify this option to avoid
-generating those instructions.  This option is the default on FreeBSD,
+generating those instructions.  This option is the default on
 OpenBSD and NetBSD@.  This option is overridden when @option{-march}
 indicates that the target CPU always has an FPU and so the
 instruction does not need emulation.  These

[PATCH] PR target/66746: Failure to compile #include x86intrin.h with -miamcu

2015-07-02 Thread H.J. Lu

x86intrin.h has useful intrinsics for instructions for IA MCU.  This
patch adds __iamcu__ check to x86intrin.h and ia32intrin.h.

OK for trunk?

H.J.
---
gcc/

PR target/66746
* config/i386/ia32intrin.h (__crc32b): Don't define if __iamcu__
is defined.
(__crc32w): Likewise.
(__crc32d): Likewise.
(__rdpmc): Likewise.
(__rdtscp): Likewise.
(_rdpmc): Likewise.
(_rdtscp): Likewise.
* config/i386/x86intrin.h: Only include ia32intrin.h if __iamcu__
is defined.

gcc/testsuite/

PR target/66746
* gcc.target/i386/pr66746.c: New file.
---
 gcc/config/i386/ia32intrin.h| 16 +++-
 gcc/config/i386/x86intrin.h |  5 +
 gcc/testsuite/gcc.target/i386/pr66746.c | 10 ++
 3 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66746.c

diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 1f728c8..b8d1c31 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -49,6 +49,8 @@ __bswapd (int __X)
   return __builtin_bswap32 (__X);
 }
 
+#ifndef __iamcu__
+
 #ifndef __SSE4_2__
 #pragma GCC push_options
 #pragma GCC target(sse4.2)
@@ -82,6 +84,8 @@ __crc32d (unsigned int __C, unsigned int __V)
 #pragma GCC pop_options
 #endif /* __DISABLE_SSE4_2__ */
 
+#endif /* __iamcu__ */
+
 /* 32bit popcnt */
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -90,6 +94,8 @@ __popcntd (unsigned int __X)
   return __builtin_popcount (__X);
 }
 
+#ifndef __iamcu__
+
 /* rdpmc */
 extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -98,6 +104,8 @@ __rdpmc (int __S)
   return __builtin_ia32_rdpmc (__S);
 }
 
+#endif /* __iamcu__ */
+
 /* rdtsc */
 extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -106,6 +114,8 @@ __rdtsc (void)
   return __builtin_ia32_rdtsc ();
 }
 
+#ifndef __iamcu__
+
 /* rdtscp */
 extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -114,6 +124,8 @@ __rdtscp (unsigned int *__A)
   return __builtin_ia32_rdtscp (__A);
 }
 
+#endif /* __iamcu__ */
+
 /* 8bit rol */
 extern __inline unsigned char
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -290,9 +302,11 @@ __writeeflags (unsigned int X)
 #define _bit_scan_reverse(a)   __bsrd(a)
 #define _bswap(a)  __bswapd(a)
 #define _popcnt32(a)   __popcntd(a)
+#ifndef __iamcu__
 #define _rdpmc(a)  __rdpmc(a)
-#define _rdtsc()   __rdtsc()
 #define _rdtscp(a) __rdtscp(a)
+#endif /* __iamcu__ */
+#define _rdtsc()   __rdtsc()
 #define _rotwl(a,b)__rolw((a), (b))
 #define _rotwr(a,b)__rorw((a), (b))
 #define _rotl(a,b) __rold((a), (b))
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 6f7b1f6..be0a1a1 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -26,6 +26,8 @@
 
 #include ia32intrin.h
 
+#ifndef __iamcu__
+
 #include mmintrin.h
 
 #include xmmintrin.h
@@ -86,4 +88,7 @@
 #include xsavecintrin.h
 
 #include mwaitxintrin.h
+
+#endif /* __iamcu__ */
+
 #endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/testsuite/gcc.target/i386/pr66746.c 
b/gcc/testsuite/gcc.target/i386/pr66746.c
new file mode 100644
index 000..3ef77bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66746.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options -O2 -miamcu } */
+
+/* Defining away extern and __inline results in all of them being
+   compiled as proper functions.  */
+
+#define extern
+#define __inline
+
+#include x86intrin.h
-- 
2.4.3

Re: C++ PATCH to change default dialect to C++14

2015-07-02 Thread Jim Wilson

On 07/01/2015 11:17 PM, Jim Wilson wrote:
 On Wed, Jul 1, 2015 at 10:21 PM, Jason Merrill ja...@redhat.com wrote:
 This document also says that A workaround until libraries get updated is to
 include cstddef or stddef.h before any headers from that library.
 Can you try modifying the graphite* files accordingly?
 
 Right.  I forgot to try that.  Trying it now, I see that my build gets
 past the point that it failed, so this does appear to work.  I won't
 be able to finish a proper test until tomorrow, but for now this patch
 seems to work.

Since the patch to include system.h before the isl header did not work,
I went ahead and tested this patch to add stddef.h includes before the
isl headers.  I tested it with an x86_64 bootstrap and make check.
There were no problems caused by my patch.

Though as a side effect of doing this, I discovered another minor
problem with the C++ version change.  This caused one additional
testsuite failure.  It also caused a bunch of tests to start working,
which is nice, but the new failure needs to be addressed.

/home/wilson/FOSS/GCC/gcc-svn/gcc/testsuite/gcc.dg/plugin/wide-int_plugin.
c: In function 'void test_double_int_round_udiv()':
/home/wilson/FOSS/GCC/gcc-svn/gcc/testsuite/gcc.dg/plugin/wide-int_plugin.
c:13:45: error: narrowing conversion of '-1' from '
int' to 'long unsigned int' inside { } [-Wnarrowing]
   double_int dmax = { -1, HOST_WIDE_INT_MAX };
 ^
/home/wilson/FOSS/GCC/gcc-svn/gcc/testsuite/gcc.dg/plugin/wide-int_plugin.
c:14:33: error: narrowing conversion of '-1' from '
int' to 'long unsigned int' inside { } [-Wnarrowing]
   double_int dnegone = { -1, -1 };
 ^
...
FAIL: gcc.dg/plugin/wide-int_plugin.c compilation

The code compiles with -std=c++98.  It does not compile with -std=c++14.
 So this testcase should be fixed to work with c++14.  Or the c++14
support should be fixed if it is broken.

Jim

Re: [PATCH] config/bfin/bfin.c (hwloop_optimize): Use return false instead of gcc_assert for checking jump_insn.

2015-07-02 Thread Chen Gang

On 07/01/2015 11:27 PM, Chen Gang wrote:
 On 7/1/15 21:52, Bernd Schmidt wrote:
 On 07/01/2015 03:04 AM, Chen Gang wrote:

 For me, the more details are:

   - The insns have 2 loops which can be lsetup optimized.

   - After hwloop_optimize finishes 1st lsetup optimization, it generates
 new lsetup insn which appends to jump insn in the basic block (which
 causes the insns are not 'standard' but OK for code generation).

 The problem is that you can't append anything to a basic block after a jump. 
 You need to create a new one. This problem doesn't usually show up since 
 nothing ever looks at the basic block again, unless both directions from the 
 conditional branch happen to branch to lsetup candidate loops.

 
 OK, thanks. What you said sound reasonable to me.
  
 Below is a patch. Can you test this with anything you have beyond the 
 testsuite?

 
 It can fix this issue (Bug66620), let the insns standard, and can build
 the bfin kernel with allmodconfig successfully (although for bfin kernel
 members, they stick to allmodconfig is not a good idea for bfin kernel).
 
 It finished lsetup optimization for one loop, but still left the other (
 get the same .s as my original fix). for 2nd times in hwloop_optimize, it
 return false. And welcome any additional ideas for it.
 

I shall continue to analyse why 2nd lsetup optimiation has not happened.
Hope I can finish within next week (2015-07-12).


 For me, my original fix is incorrect: it still remains the insns in the
 incorrect state (although my fix can generate the correct .s, and can
 build bfin kernel with allmodconfig successfully).
 
 
 Thanks.
 

-- 
Chen Gang

Open, share, and attitude like air, water, and life which God blessed

[PATCH] New configure option to default enable Smart Stack Protection

2015-07-02 Thread Magnus Granberg

Hi
Working on a patch that enable Smart Stack Protection as default.
I still miss docs and testcase for the patch. I need you ides and help.

/Magnus G.
---

--- a/gcc/configure.ac	2014-12-05 00:53:24.0 +0100
+++ b/gcc/configure.ac	2015-06-08 23:27:11.744348211 +0200
@@ -5221,6 +5119,25 @@ if test x$gcc_cv_libc_provides_ssp = xye
 	[Define if your target C library provides stack protector support])
 fi
 
+# Check whether --enable-default-ssp was given.
+AC_ARG_ENABLE(default-ssp,
+[AS_HELP_STRING([--enable-default-ssp],
+  [enable Smart Stack Protection as default])],
+if test x$gcc_cv_libc_provides_ssp = xyes; then
+  case $target in
+ia64*-*-*) enable_default_ssp=no ;;
+*) enable_default_ssp=$enableval ;;
+  esac
+else
+  enable_default_ssp=no
+fi,
+enable_default_ssp=no)
+if test x$enable_default_ssp == xyes ; then
+  AC_DEFINE(ENABLE_DEFAULT_SSP, 1,
+  [Define if your target supports default STACK-PROTECTOR and it is enabled.])
+fi
+AC_SUBST([enable_default_ssp])
+
 # Test for sys/sdt.h on the target.
 GCC_TARGET_TEMPLATE([HAVE_SYS_SDT_H])
 AC_MSG_CHECKING(sys/sdt.h in the target C library)
--- a/gcc/defaults.h	2014-11-01 09:13:09.0 +0100
+++ b/gcc/defaults.h	2015-06-08 22:43:18.764269749 +0200
@@ -1263,6 +1263,18 @@ see the files COPYING3 and COPYING.RUNTI
 #define STACK_SIZE_MODE word_mode
 #endif
 
+/* Default value for flag_stack_protect when flag_stack_protect is initialized to -1:
+   --enable-default-ssp: Default flag_stack_protect to -fstack-protector-strong.
+   --disable-default-ssp: Default flag_stack_protect to 0.
+ */
+#ifdef ENABLE_DEFAULT_SSP
+# ifndef DEFAULT_FLAG_SSP
+#  define DEFAULT_FLAG_SSP 3
+# endif
+#else
+# define DEFAULT_FLAG_SSP 0
+#endif
+
 /* Provide default values for the macros controlling stack checking.  */
 
 /* The default is neither full builtin stack checking...  */
--- a/gcc/common.opt	2014-10-28 11:33:04.0 +0100
+++ b/gcc/common.opt	2015-06-08 22:41:30.114266512 +0200
@@ -2054,15 +2054,15 @@ Common RejectNegative Joined Var(common_
 -fstack-limit-symbol=name	Trap if the stack goes past symbol name
 
 fstack-protector
-Common Report Var(flag_stack_protect, 1)
+Common Report Var(flag_stack_protect, 1) Init(-1)
 Use propolice as a stack protection method
 
 fstack-protector-all
-Common Report RejectNegative Var(flag_stack_protect, 2)
+Common Report RejectNegative Var(flag_stack_protect, 2) Init(-1)
 Use a stack protection method for every function
 
 fstack-protector-strong
-Common Report RejectNegative Var(flag_stack_protect, 3)
+Common Report RejectNegative Var(flag_stack_protect, 3) Init(-1)
 Use a smart stack protection method for certain functions
 
 fstack-protector-explicit
-Common Report RejectNegative Var(flag_stack_protect, 4)
+Common Report RejectNegative Var(flag_stack_protect, 4) Init(-1)
 Use stack protection method only for functions with the stack_protect attribute
 
 fstack-usage
--- a/gcc/opts.c	2014-10-28 11:33:04.0 +0100
+++ b/gcc/opts.c	2015-06-27 01:06:48.670870534 +0200
@@ -739,6 +740,1 @@ finish_options (struct gcc_options *opts
	   opts-x_flag_opts_finished = true;
	 }
 
+  /* We initialize opts-x_flag_stack_protect to -1 so that targets
+  can set a default value.  */
+  if (opts-x_flag_stack_protect == -1)
+opts-x_flag_stack_protect = DEFAULT_FLAG_SSP;
+
   if (opts-x_optimize == 0)
 {
   /* Inlining does not work if not optimizing,

Re: Fixed Regressions with [committed] Use target-insns.def for prologue epilogue insns

2015-07-02 Thread Hans-Peter Nilsson

 From: Richard Sandiford rdsandif...@googlemail.com
 Date: Thu, 2 Jul 2015 20:58:15 +0200

 Hans-Peter Nilsson hans-peter.nils...@axis.com writes:
  gcc:
  * config/cris/cris.md (epilogue): Remove condition.
  (prologue): Ditto.

 Thanks.

No, thank *you* for the massive #ifdef HAVE_x - if (HAVE_x)
cleanup!  Some kind of fallout is practically inevitable.  I
appreciate the initial look even if it's just to punt to the
target maintainer.

brgds, H-P

Re: Do not take address of empty string front

2015-07-02 Thread Jonathan Wakely


On 22/06/15 16:10 +0100, Jonathan Wakely wrote:

On 20/06/15 12:59 +0100, Jonathan Wakely wrote:

On 20/06/15 12:03 +0200, François Dumont wrote:

Hi

 2 experimental tests are failing in debug mode because
__do_str_codecvt is sometimes taking address of string front() and
back() even if empty. It wasn't use so not a big issue but it still
seems better to avoid. I propose to rather use string begin() to get
buffer address.


But derefencing begin() is still undefined for an empty string.
Shouldn't that fail for debug mode too? Why change one form of
undefined behaviour that we diagnose to another form that we don't
diagnose?

It would be better if that function didn't do any work when the input
range is empty:

--- a/libstdc++-v3/include/bits/locale_conv.h
+++ b/libstdc++-v3/include/bits/locale_conv.h
@@ -58,6 +58,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _OutStr __outstr, const _Codecvt __cvt, _State __state,
  size_t __count, _Fn __fn)
  {
+  if (__first == __last)
+   {
+ __outstr.clear();
+ return true;
+   }
+
size_t __outchars = 0;
auto __next = __first;
const auto __maxlen = __cvt.max_length() + 1;


This makes that change, and also moves wstring_convert into the
ABI-tagged __cxx11 namespace, and fixes a copypaste error in the
exception thrown from wbuffer_convert.


This is the equivalent patch for the branch.

Tested powerpc64le-linux, committed to gcc-5-branch.
commit ab6011c5ffbd16f7f3f509f6e9fec6dc9f7daf36
Author: Jonathan Wakely jwak...@redhat.com
Date:   Wed Jul 1 15:41:33 2015 +0100

	* include/bits/locale_conv.h (wstring_convert): Use __cxx11 inline
	namespace in new ABI.
	(wstring_convert::_M_conv): Handle empty range.

diff --git a/libstdc++-v3/include/bits/locale_conv.h b/libstdc++-v3/include/bits/locale_conv.h
index 9be2866..de49dd5 100644
--- a/libstdc++-v3/include/bits/locale_conv.h
+++ b/libstdc++-v3/include/bits/locale_conv.h
@@ -51,6 +51,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
 
+_GLIBCXX_BEGIN_NAMESPACE_CXX11
   /// String conversions
   templatetypename _Codecvt, typename _Elem = wchar_t,
 	   typename _Wide_alloc = allocator_Elem,
@@ -192,10 +193,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_M_conv(const _InChar* __first, const _InChar* __last,
 		const _OutStr* __err, _MemFn __memfn)
 	{
+	  auto __outstr = __err ? _OutStr(__err-get_allocator()) : _OutStr();
+
+	  if (__first == __last)
+	{
+	  _M_count = 0;
+	  return __outstr;
+	}
+
 	  if (!_M_with_cvtstate)
 	_M_state = state_type();
 
-	  auto __outstr = __err ? _OutStr(__err-get_allocator()) : _OutStr();
 	  size_t __outchars = 0;
 	  auto __next = __first;
 	  const auto __maxlen = _M_cvt-max_length() + 1;
@@ -239,6 +247,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   bool			_M_with_cvtstate = false;
   bool			_M_with_strings = false;
 };
+_GLIBCXX_END_NAMESPACE_CXX11
 
   /// Buffer conversions
   templatetypename _Codecvt, typename _Elem = wchar_t,
@@ -264,7 +273,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
   {
 	if (!_M_cvt)
-	  __throw_logic_error(wstring_convert);
+	  __throw_logic_error(wbuffer_convert);
 
 	_M_always_noconv = _M_cvt-always_noconv();

Re: [C++/preprocessor Patch] PR c++/53690

2015-07-02 Thread Jason Merrill


OK.

Jason

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Tobias Grosser


On 07/02/2015 06:52 PM, Aditya Kumar wrote:

gcc/ChangeLog:

2015-07-02  Aditya Kumar  aditya...@samsung.com
Sebastian Pop  s@samsung.com

 * graphite-sese-to-poly.c (rewrite_cross_bb_scalar_deps):
Point iterator to use_stmt.



Hi Aditya,

this patch does not explain what was wrong and why this change is 
correct. Could you possibly add such an explanation.


Best,
Tobias



Bug introduced by patch:
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217787
---
  gcc/graphite-sese-to-poly.c | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 271c499..78f10e4 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2458,11 +2458,10 @@ rewrite_cross_bb_scalar_deps (scop_p scop, 
gimple_stmt_iterator *gsi)
handle_scalar_deps_crossing_scop_limits (scop, def, stmt);

FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
-if (gimple_code (use_stmt) == GIMPLE_PHI
-(res = true))
+if (gphi *phi = dyn_cast gphi * (use_stmt))
{
-   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));
-
+   res = true;
+   gphi_iterator psi = gsi_for_phi (phi);
if (scalar_close_phi_node_p (gsi_stmt (psi)))
  rewrite_close_phi_out_of_ssa (scop, psi);
else

Re: [PATCH][RFC] Add FRE in pass_vectorize

2015-07-02 Thread Jeff Law


On 07/02/2015 05:40 AM, Alan Lawrence wrote:

Jeff Law wrote:

On 06/24/2015 01:59 AM, Richard Biener wrote:

And then there is the possibility of making passes generate less
needs to perform cleanups after them - like in the present case
with the redundant IVs make them more appearant redundant by
CSEing the initial value and step during vectorizer code generation.
I'm playing with the idea of adding a simple CSE machinery to
the gimple_build () interface (aka match-and-simplify).  It
eventually invokes (well, not currently, but that can be fixed)
maybe_push_res_to_seq which is a good place to maintain a
table of already generated expressions.  That of course only
works if you either always append to the same sequence or at least
insert at the same place.

As you know we've gone back and forth on this in the past.  It's
always a trade-off.  I still ponder from time to time putting the
simple CSE and cprop bits back into the SSA rewriting phase to avoid
generating all kinds of garbage that just needs to be cleaned up later
-- particularly for incremental SSA updates.


Coming to this rather late, and without the background knowledge about
having gone back and forth, sorry! But what are the arguments against
this? Am I right in thinking that the SSA Rewriting phase would not
trigger as often as gimple_build(), or are these the same thing?
It's the into-ssa and incremental update phases.  The basic idea is it 
is very inexpensive to do const/copy propagation and simple CSE at that 
point.


When processing an assignment, after rewriting the inputs from _DECL 
nodes to SSA_NAMEs, you lookup the RHS in your hash table.  If you get a 
hit, you replace the expression with the SSA_NAME from the hash table 
and record that the destination has an equivalence.


Diego took this out several years ago with the idea that the into-ssa  
updates should be kept separate from optimizations.  With the ongoing 
need for early cleanups to make IPA more effective, I think it's time to 
revisit that decision as we get a lot of the obvious redundancies out of 
the stream by just being smart during into-ssa.  Which in turn means we 
don't have to do as much in the early optimizations before IPA.




Presumably when you say simple CSE machinery you'd have to bail out
quickly from tricky cases like, say:

if (P)
   {
 use ...expr...
   }
...
if (Q)
   {
 now building a new ...expr... here
   }
Not sure the problem here.  The simple CSE/cprop occurs as we're going 
into SSA form -- because into-ssa is inherently a dominator walk and 
we're rewriting operands as we go, we can trivially determine that we've 
already seen a given expression earlier in the dominator tree and that 
the result of that expression hasn't changed (by the nature of SSA).


Jeff

[V850] Hookize GO_IF_LEGITIMATE_ADDRESS

2015-07-02 Thread Anatoliy Sokolov


Hi.

This patch removes obsolete GO_IF_LEGITIMATE_ADDRESS macros from
the V850 back end in the GCC and introduces equivalent
TARGET_LEGITIMATE_ADDRESS_P target hook.

Regression tested on v850-unknown-eabi.

2015-07-02  Anatoly Sokolov  ae...@post.ru

* config/v850/v850.h (REG_OK_FOR_BASE_P, REG_OK_FOR_INDEX_P,
REG_OK_FOR_BASE_P_STRICT, REG_OK_FOR_INDEX_P_STRICT, STRICT,
RTX_OK_FOR_BASE_P, GO_IF_LEGITIMATE_ADDRESS): Remove macros.
* config/v850/v850.c (TARGET_LEGITIMATE_ADDRESS_P): Define.
(v850_reg_ok_for_base_, v850_rtx_ok_for_base_p,
v850_legitimate_address_p): New functions.


Index: gcc/config/v850/v850.c
===
--- gcc/config/v850/v850.c  (revision 225152)
+++ gcc/config/v850/v850.c  (working copy)
@@ -3089,6 +3089,63 @@
!CONST_OK_FOR_K (INTVAL (XEXP (XEXP (x, 0), 1);
 }

+/* Helper function for `v850_legitimate_address_p'.  */
+
+static bool
+v850_reg_ok_for_base_p (const_rtx reg, bool strict_p)
+{
+  if (strict_p)
+  {
+return REGNO_OK_FOR_BASE_P (REGNO (reg));
+  } else {
+return true;
+  }
+}
+
+/* Accept either REG or SUBREG where a register is valid.  */
+
+static bool
+v850_rtx_ok_for_base_p (const_rtx x, bool strict_p)
+{
+  return ((REG_P (x)  v850_reg_ok_for_base_p  (x, strict_p))
+ || (SUBREG_P (x)  REG_P (SUBREG_REG (x))
+  v850_reg_ok_for_base_p (SUBREG_REG (x), strict_p)));
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+v850_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
+{
+  if (v850_rtx_ok_for_base_p (x, strict_p))
+return true;
+  if (CONSTANT_ADDRESS_P (x)
+   (mode == QImode || INTVAL (x) % 2 == 0)
+   (GET_MODE_SIZE (mode) = 4 || INTVAL (x) % 4 == 0))
+return true;
+  if (GET_CODE (x) == LO_SUM
+   REG_P (XEXP (x, 0))
+   v850_reg_ok_for_base_p (XEXP (x, 0), strict_p)
+   CONSTANT_P (XEXP (x, 1))
+   (!CONST_INT_P (XEXP (x, 1))
+ || ((mode == QImode || INTVAL (XEXP (x, 1)) % 2 == 0)
+  constraint_satisfied_p (XEXP (x, 1), CONSTRAINT_K)))
+   GET_MODE_SIZE (mode) = GET_MODE_SIZE (word_mode))
+return true;
+  if (special_symbolref_operand (x, mode)
+   (GET_MODE_SIZE (mode) = GET_MODE_SIZE (word_mode)))
+return true;
+  if (GET_CODE (x) == PLUS
+   v850_rtx_ok_for_base_p (XEXP (x, 0), strict_p)
+   constraint_satisfied_p (XEXP (x,1), CONSTRAINT_K)
+   ((mode == QImode || INTVAL (XEXP (x, 1)) % 2 == 0)
+   CONST_OK_FOR_K (INTVAL (XEXP (x, 1))
+ + (GET_MODE_NUNITS (mode) * UNITS_PER_WORD
+return true;
+
+  return false;
+}
+
 static int
 v850_memory_move_cost (machine_mode mode,
   reg_class_t reg_class ATTRIBUTE_UNUSED,
@@ -3291,6 +3348,9 @@
 #undef  TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P v850_legitimate_constant_p

+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P v850_legitimate_address_p
+
 #undef  TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost

Index: gcc/config/v850/v850.h
===
--- gcc/config/v850/v850.h  (revision 225152)
+++ gcc/config/v850/v850.h  (working copy)
@@ -592,89 +592,7 @@
 /* Maximum number of registers that can appear in a valid memory address.  */

 #define MAX_REGS_PER_ADDRESS 1
-
-/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
-   and check its validity for a certain class.
-   We have two alternate definitions for each of them.
-   The usual definition accepts all pseudo regs; the other rejects
-   them unless they have been allocated suitable hard regs.
-   The symbol REG_OK_STRICT causes the latter definition to be used.
-
-   Most source files want to accept pseudo regs in the hope that
-   they will get allocated to the class that the insn wants them to be in.
-   Source files for reload pass need to be strict.
-   After reload, it makes no difference, since pseudo regs have
-   been eliminated by then.  */
-
-#ifndef REG_OK_STRICT
-
-/* Nonzero if X is a hard reg that can be used as an index
-   or if it is a pseudo reg.  */
-#define REG_OK_FOR_INDEX_P(X) 0
-/* Nonzero if X is a hard reg that can be used as a base reg
-   or if it is a pseudo reg.  */
-#define REG_OK_FOR_BASE_P(X) 1
-#define REG_OK_FOR_INDEX_P_STRICT(X) 0
-#define REG_OK_FOR_BASE_P_STRICT(X) REGNO_OK_FOR_BASE_P (REGNO (X))
-#define STRICT 0
-
-#else
-
-/* Nonzero if X is a hard reg that can be used as an index.  */
-#define REG_OK_FOR_INDEX_P(X) 0
-/* Nonzero if X is a hard reg that can be used as a base reg.  */
-#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
-#define STRICT 1
-
-#endif
-
 
-/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
-   that is a valid memory address for an instruction.
-   The MODE argument is the machine mode for the MEM

[PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Aditya Kumar

gcc/ChangeLog:

2015-07-02  Aditya Kumar  aditya...@samsung.com
Sebastian Pop  s@samsung.com

* graphite-sese-to-poly.c (rewrite_cross_bb_scalar_deps):
Point iterator to use_stmt.


Bug introduced by patch:
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217787
---
 gcc/graphite-sese-to-poly.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 271c499..78f10e4 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2458,11 +2458,10 @@ rewrite_cross_bb_scalar_deps (scop_p scop, 
gimple_stmt_iterator *gsi)
   handle_scalar_deps_crossing_scop_limits (scop, def, stmt);
 
   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
-if (gimple_code (use_stmt) == GIMPLE_PHI
-(res = true))
+if (gphi *phi = dyn_cast gphi * (use_stmt))
   {
-   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));
-
+   res = true;
+   gphi_iterator psi = gsi_for_phi (phi);
if (scalar_close_phi_node_p (gsi_stmt (psi)))
  rewrite_close_phi_out_of_ssa (scop, psi);
else
-- 
2.1.0.243.g30d45f7

Re: C++ PATCH to change default dialect to C++14

2015-07-02 Thread Jim Wilson

On Thu, Jul 2, 2015 at 6:08 AM, Richard Biener
richard.guent...@gmail.com wrote:
 On Thu, Jul 2, 2015 at 3:05 PM, Richard Biener
 richard.guent...@gmail.com wrote:
 Yeah, I _think_ the fix is to move the isl includes below the system.h
 include.

I tried this, and got a conflict for pretty much every macro defined
by ctype.h, e.g.

/usr/include/c++/4.8/bits/locale_facets.h:240:53: error: macro
toupper passed 2 arguments, but takes just 1
   toupper(char_type *__lo, const char_type* __hi) const

system.h includes safe-ctype.h which does
#include ctype.h
...
#undef toupper
#define toupper(c) do_not_use_toupper_with_safe_ctype

isl/int.h includes iostream which includes other header files that
want to define ctype like functions and we get a conflict.  I see that
ctype.h doesn't define any macros when __cplusplus is defined, but
if we disable the macro redefines in safe-ctype.h, it isn't very safe
anymore.  This is probably why the isl headers were deliberately
included before system.h when the ISL support was first added.

Jim

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Tobias Grosser


On 07/02/2015 08:34 PM, Sebastian Pop wrote:

On Thu, Jul 2, 2015 at 1:17 PM, Tobias Grosser tob...@grosser.es wrote:

On 07/02/2015 06:52 PM, Aditya Kumar wrote:


gcc/ChangeLog:

2015-07-02  Aditya Kumar  aditya...@samsung.com
 Sebastian Pop  s@samsung.com

  * graphite-sese-to-poly.c (rewrite_cross_bb_scalar_deps):
 Point iterator to use_stmt.



Hi Aditya,

this patch does not explain what was wrong and why this change is correct.
Could you possibly add such an explanation.


One of the code refactorings introducing phi node iterators modified
the semantics of this code as described below ...



Best,
Tobias




Bug introduced by patch:
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217787


... here.
If you git log grep for this commit, you would see that this patch
reverts this typo introduced in a very large patch.


Sure. The corresponding change was:

-   gimple_stmt_iterator psi = gsi_for_stmt (use_stmt);
+   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));

Now this commit is not a pure revert. Instead of falling back to 
gsi_for_stmt, we now use gsi_for_phi(phi) and also somehow modify the 
condition above. I assume this is still correct, but as I am a little 
out of graphite, it would really help to explain (in two sentences in 
the commit message) why the previous change was wrong and how the 
behavior is now different. Something like:


After this patch we start to iterate at the very first phi node,
whereas before this applied we skipped the PHI nodes and started 
iterating at the first actual instruciton.


Thanks,
Tobias

Re: Fixed Regressions with [committed] Use target-insns.def for prologue epilogue insns

2015-07-02 Thread Richard Sandiford

Hans-Peter Nilsson hans-peter.nils...@axis.com writes:
 From: Richard Sandiford rdsandif...@googlemail.com
 Date: Wed, 1 Jul 2015 23:26:59 +0200

 Hans-Peter Nilsson hans-peter.nils...@axis.com writes:
  From: Richard Sandiford richard.sandif...@arm.com
  Date: Tue, 30 Jun 2015 22:55:24 +0200

  Bootstrapped  regression-tested on x86_64-linux-gnu and 
  aarch64-linux-gnu.
  Also tested via config-list.mk.  Committed as preapproved.

  Thanks,
  Richard

  gcc/
  * defaults.h (HAVE_epilogue, gen_epilogue): Delete.
  * target-insns.def (epilogue, prologue, sibcall_prologue): New
  targetm instruction patterns.
  * alias.c (init_alias_analysis): Use them instead of HAVE_*/gen_*
  interface.
  * calls.c (expand_call): Likewise.
  * cfgrtl.c (cfg_layout_finalize): Likewise.
  * df-scan.c (df_get_entry_block_def_set): Likewise.
  (df_get_exit_block_use_set): Likewise.
  * dwarf2cfi.c (pass_dwarf2_frame::gate): Likewise.
  * final.c (final_start_function): Likewise.
  * function.c (thread_prologue_and_epilogue_insns): Likewise.
  (reposition_prologue_and_epilogue_notes): Likewise.
  * reorg.c (find_end_label): Likewise.
  * toplev.c (process_options): Likewise.

  I think this one -being the most fitting patch in the range
  (225190:225210]- caused this regression for cris-elf:

  Running
  /tmp/hpautotest-gcc1/gcc/gcc/testsuite/gcc.target/cris/torture/cris-torture.exp
  ...
  FAIL: gcc.target/cris/torture/no-pro-epi-1.c -O3 -g (internal
  compiler error)
  FAIL: gcc.target/cris/torture/no-pro-epi-1.c -O3 -g (test for excess
  errors)

  This test checks that the -mno-prologue-epilogue option works,
  whose semantics is supposedly self-explanatory.

 Well, yes and no :-)

 Hm...I take that as an affirmation on the regression but perhaps
 a no to some of the my statements...

Just the semantics being self-explanatory.  It wasn't obvious to me
what we were supposed to do with CFI.  Whatever works for me though...

 which I guess means that the HAVE_prologue condition wasn't being
 consistently tested.  Now that it is: is -mno-prologue-epilogue
 just supposed to generate empty prologues and epilogues, as implied
 by the cris.c code?  If so then removing the conditions on prologue
 and epilogue should work.  If not, then which of the
 targetm.have_prologue ()
 etc. conditions do you need to be true for -mno-prologue-epilogue?

 (You have the distinction of having the only port with conditional
 prologue and epilogue patterns. :-))

 Not any longer.  Also removed a stale comment.
 This committed patch fixes the noted regressions, without
 causing further regressions, testing cris-elf in a simulator.

 gcc:
   * config/cris/cris.md (epilogue): Remove condition.
   (prologue): Ditto.

Thanks.

Richard

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Sebastian Pop

On Thu, Jul 2, 2015 at 1:44 PM, Tobias Grosser tob...@grosser.es wrote:
 If you git log grep for this commit, you would see that this patch
 reverts this typo introduced in a very large patch.


 Sure. The corresponding change was:

 -   gimple_stmt_iterator psi = gsi_for_stmt (use_stmt);
 +   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));

 Now this commit is not a pure revert. Instead of falling back to

IMO the patch restores the semantics, so it is a revert to some syntax changes:
in the past we had this:

 -   gimple_stmt_iterator psi = gsi_for_stmt (use_stmt);

that is get me an iterator pointing on the use_stmt.
After our patch we get the same semantics back (modulo some change in
function names, c++-ification, etc.)

gphi *phi = dyn_cast gphi * (use_stmt)
gphi_iterator psi = gsi_for_phi (phi);

that is again an iterator pointing on the use_stmt.

The patch at r217787 was incorrectly initializing the iterator
to point at the beginning of the phi nodes in the BB of the use_stmt:

 +   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));

This makes no sense, as then we would start processing a random phi node
and would fail to insert an array for a virtual phi node.

Sebastian

 gsi_for_stmt, we now use gsi_for_phi(phi) and also somehow modify the
 condition above. I assume this is still correct, but as I am a little out of
 graphite, it would really help to explain (in two sentences in the commit
 message) why the previous change was wrong and how the behavior is now
 different. Something like:

 After this patch we start to iterate at the very first phi node,
 whereas before this applied we skipped the PHI nodes and started iterating
 at the first actual instruciton.

 Thanks,
 Tobias

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Tobias Grosser


On 07/02/2015 09:03 PM, Sebastian Pop wrote:

On Thu, Jul 2, 2015 at 1:44 PM, Tobias Grosser tob...@grosser.es wrote:

If you git log grep for this commit, you would see that this patch
reverts this typo introduced in a very large patch.



Sure. The corresponding change was:

-   gimple_stmt_iterator psi = gsi_for_stmt (use_stmt);
+   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));

Now this commit is not a pure revert. Instead of falling back to


IMO the patch restores the semantics, so it is a revert to some syntax changes:
in the past we had this:


-   gimple_stmt_iterator psi = gsi_for_stmt (use_stmt);


that is get me an iterator pointing on the use_stmt.
After our patch we get the same semantics back (modulo some change in
function names, c++-ification, etc.)

gphi *phi = dyn_cast gphi * (use_stmt)
gphi_iterator psi = gsi_for_phi (phi);

that is again an iterator pointing on the use_stmt.

The patch at r217787 was incorrectly initializing the iterator
to point at the beginning of the phi nodes in the BB of the use_stmt:


+   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));


This makes no sense, as then we would start processing a random phi node
and would fail to insert an array for a virtual phi node.


Thanks. I am a little slow today. The patch looks indeed correct. Maybe 
you could add this explanation to the commit message and also add a test 
case as Ramana suggested.


Tobias

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Ramana Radhakrishnan

On Thu, Jul 2, 2015 at 7:34 PM, Sebastian Pop seb...@gmail.com wrote:
 On Thu, Jul 2, 2015 at 1:17 PM, Tobias Grosser tob...@grosser.es wrote:
 On 07/02/2015 06:52 PM, Aditya Kumar wrote:

 gcc/ChangeLog:

 2015-07-02  Aditya Kumar  aditya...@samsung.com
 Sebastian Pop  s@samsung.com

  * graphite-sese-to-poly.c (rewrite_cross_bb_scalar_deps):
 Point iterator to use_stmt.


 Hi Aditya,

 this patch does not explain what was wrong and why this change is correct.
 Could you possibly add such an explanation.

 One of the code refactorings introducing phi node iterators modified
 the semantics of this code as described below ...


 Best,
 Tobias



 Bug introduced by patch:
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217787

 ... here.
 If you git log grep for this commit, you would see that this patch
 reverts this typo introduced in a very large patch.


How about a testcase or 2 or mentioning if it is covered by existing
testcases ? And Aditya you may find it instructive to read this
https://gcc.gnu.org/contribute.html#patches

regards
Ramana


 Thanks,
 Sebastian

 ---
   gcc/graphite-sese-to-poly.c | 7 +++
   1 file changed, 3 insertions(+), 4 deletions(-)

 diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
 index 271c499..78f10e4 100644
 --- a/gcc/graphite-sese-to-poly.c
 +++ b/gcc/graphite-sese-to-poly.c
 @@ -2458,11 +2458,10 @@ rewrite_cross_bb_scalar_deps (scop_p scop,
 gimple_stmt_iterator *gsi)
 handle_scalar_deps_crossing_scop_limits (scop, def, stmt);

 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
 -if (gimple_code (use_stmt) == GIMPLE_PHI
 -(res = true))
 +if (gphi *phi = dyn_cast gphi * (use_stmt))
 {
 -   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));
 -
 +   res = true;
 +   gphi_iterator psi = gsi_for_phi (phi);
 if (scalar_close_phi_node_p (gsi_stmt (psi)))
   rewrite_close_phi_out_of_ssa (scop, psi);
 else

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Sebastian Pop

On Thu, Jul 2, 2015 at 2:03 PM, Ramana Radhakrishnan
ramana@googlemail.com wrote:
 How about a testcase or 2 or mentioning if it is covered by existing
 testcases ?

The patch fixes a test in testsuite/gcc.dg/graphite/ when removing the
use of limit_scops().
Maybe the commit message could contain the name of the test that it fixed.

The patch that removes limit_scops() is in my opinion trivial, and
will be submitted for review once we fixed all the errors it can cause
(code gen, scop translation to polyhedral, etc.)
We will also fix bootstrap with graphite enabled, and then we will fix
all problems in bootstrap with limit_scops() removed.
I will also add a buildbot tracking nightly bootstraps with -floop-*
and -fgraphite-identity.

 And Aditya you may find it instructive to read this
 https://gcc.gnu.org/contribute.html#patches


Agreed.

Thanks for the feedback.
Sebastian

 regards
 Ramana


 Thanks,
 Sebastian

 ---
   gcc/graphite-sese-to-poly.c | 7 +++
   1 file changed, 3 insertions(+), 4 deletions(-)

 diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
 index 271c499..78f10e4 100644
 --- a/gcc/graphite-sese-to-poly.c
 +++ b/gcc/graphite-sese-to-poly.c
 @@ -2458,11 +2458,10 @@ rewrite_cross_bb_scalar_deps (scop_p scop,
 gimple_stmt_iterator *gsi)
 handle_scalar_deps_crossing_scop_limits (scop, def, stmt);

 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
 -if (gimple_code (use_stmt) == GIMPLE_PHI
 -(res = true))
 +if (gphi *phi = dyn_cast gphi * (use_stmt))
 {
 -   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));
 -
 +   res = true;
 +   gphi_iterator psi = gsi_for_phi (phi);
 if (scalar_close_phi_node_p (gsi_stmt (psi)))
   rewrite_close_phi_out_of_ssa (scop, psi);
 else

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Sebastian Pop

On Thu, Jul 2, 2015 at 1:17 PM, Tobias Grosser tob...@grosser.es wrote:
 On 07/02/2015 06:52 PM, Aditya Kumar wrote:

 gcc/ChangeLog:

 2015-07-02  Aditya Kumar  aditya...@samsung.com
 Sebastian Pop  s@samsung.com

  * graphite-sese-to-poly.c (rewrite_cross_bb_scalar_deps):
 Point iterator to use_stmt.


 Hi Aditya,

 this patch does not explain what was wrong and why this change is correct.
 Could you possibly add such an explanation.

One of the code refactorings introducing phi node iterators modified
the semantics of this code as described below ...


 Best,
 Tobias



 Bug introduced by patch:
 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217787

... here.
If you git log grep for this commit, you would see that this patch
reverts this typo introduced in a very large patch.

Thanks,
Sebastian

 ---
   gcc/graphite-sese-to-poly.c | 7 +++
   1 file changed, 3 insertions(+), 4 deletions(-)

 diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
 index 271c499..78f10e4 100644
 --- a/gcc/graphite-sese-to-poly.c
 +++ b/gcc/graphite-sese-to-poly.c
 @@ -2458,11 +2458,10 @@ rewrite_cross_bb_scalar_deps (scop_p scop,
 gimple_stmt_iterator *gsi)
 handle_scalar_deps_crossing_scop_limits (scop, def, stmt);

 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
 -if (gimple_code (use_stmt) == GIMPLE_PHI
 -(res = true))
 +if (gphi *phi = dyn_cast gphi * (use_stmt))
 {
 -   gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt));
 -
 +   res = true;
 +   gphi_iterator psi = gsi_for_phi (phi);
 if (scalar_close_phi_node_p (gsi_stmt (psi)))
   rewrite_close_phi_out_of_ssa (scop, psi);
 else

Re: [PATCH] Restore previous change for gimple_phi_iterator

2015-07-02 Thread Tobias Grosser


On 07/02/2015 09:09 PM, Sebastian Pop wrote:

On Thu, Jul 2, 2015 at 2:03 PM, Ramana Radhakrishnan
ramana@googlemail.com wrote:

How about a testcase or 2 or mentioning if it is covered by existing
testcases ?


The patch fixes a test in testsuite/gcc.dg/graphite/ when removing the
use of limit_scops().
Maybe the commit message could contain the name of the test that it fixed.


Right. I think just adding the missing information to the commit message 
will be enough.



The patch that removes limit_scops() is in my opinion trivial, and
will be submitted for review once we fixed all the errors it can cause
(code gen, scop translation to polyhedral, etc.)
We will also fix bootstrap with graphite enabled, and then we will fix
all problems in bootstrap with limit_scops() removed.
I will also add a buildbot tracking nightly bootstraps with -floop-*
and -fgraphite-identity.


Sounds great. Nice to see more graphite activity again.

Best,
Tobias

Re: [PATCH] fix PR46029: reimplement if conversion of loads and stores [2nd submitted version of patch]

2015-07-02 Thread Abe


On 7/2/15 4:49 AM, Alan Lawrence wrote:


Thanks, Abe.


You are welcome, sir!  :-)



As before, I'm still confused here. This still returns false, i.e. bails out of
if-conversion, if the statement could trap. Doesn't the scratchpad let us handle
that? Or do we just not care because it won't be vectorizable anyway???


This seems like an opportunity for more optimization in the future.  However, 
at this time we do not see what kind of code would benefit from this 
optimization.
If you have some time to spare and wish to spend some of it on this issue, then 
please find/write a test case that would exercise this path, i.e. a snippet of 
code
that is not optimized due to the above concern, even though it _could_ be 
if-converted -- and, preferably, the resulting conversion _is_ 
vectorizer-friendly.

Of course, even if a test case can be written to trigger this missed 
opportunity,
that in and of itself does not yet tell us how much opportunity we are missing 
in _real-world_ code.



Nit: as before [...]


Thanks for the reminder[s].



Nit: as before - thanks for fixing the example here


You are welcome.



Where can I find info on what the different flag values mean?

 (I had thought they were booleans [...]

Sorry; I don`t know if that is documented anywhere yet.

In this case, (-1) simply means defaulted: on if the vectorizer is on, and 
off if it is off.
(0) means user specified no if conversion and (1) means user specified [yes] if 
conversion.

Regards,

Abe

Re: [RFC, PATCH] Split pool_allocator and create a new object_allocator

2015-07-02 Thread Richard Sandiford

Martin Liška mli...@suse.cz writes:
 @@ -136,18 +135,18 @@ private:
   int64_t align_i;
} u;
  
 -static inline allocation_objectU *
 +static inline allocation_object*

space before *

  get_instance (void *data_ptr)
  {
 -  return (allocation_objectU *)(((char *)(data_ptr))
 -   - offsetof (allocation_objectU,
 +  return (allocation_object *)(((char *)(data_ptr))
 +   - offsetof (allocation_object,
 u.data));

space between ) and (.

  }
  
 -static inline U *
 +static inline void*
  get_data (void *instance_ptr)
  {
 -  return (U*)(((allocation_objectU *) instance_ptr)-u.data);
 +  return (void*)(((allocation_object *) instance_ptr)-u.data);

same 2 comments here, although maybe dropping the cast would be better?

 @@ -387,11 +349,11 @@ pool_allocatorT::allocate ()
/* We now know that we can take the first elt off the virgin list and
put it on the returned list.  */
block = m_virgin_free_list;
 -  header = (allocation_pool_list*) allocation_objectT::get_data 
 (block);
 +  header = (allocation_pool_list*) allocation_object::get_data (block);

Space before *.  I'll not list out the others :-)

 @@ -408,36 +370,34 @@ pool_allocatorT::allocate ()
  
  #ifdef ENABLE_CHECKING
/* Set the ID for element.  */
 -  allocation_objectT::get_instance (header)-id = m_id;
 +  allocation_object::get_instance (header)-id = m_id;
  #endif
VALGRIND_DISCARD (VALGRIND_MAKE_MEM_UNDEFINED (header, size));
  
 -  /* Call default constructor.  */
 -  return (T *)(header);
 +  return (void *)(header);

Same comment about cast to void *.

 diff --git a/gcc/asan.c b/gcc/asan.c
 index e89817e..dabd6f1 100644
 --- a/gcc/asan.c
 +++ b/gcc/asan.c
 @@ -362,20 +362,20 @@ struct asan_mem_ref
/* Pool allocation new operator.  */
inline void *operator new (size_t)
{
 -return pool.allocate ();
 +return ::new (pool.allocate ()) asan_mem_ref ();
}
  
/* Delete operator utilizing pool allocation.  */
inline void operator delete (void *ptr)
{
 -pool.remove ((asan_mem_ref *) ptr);
 +pool.remove (ptr);
}
  
/* Memory allocation pool.  */
 -  static pool_allocatorasan_mem_ref pool;
 +  static pool_allocator pool;
  };

I'm probably going over old ground/wounds, sorry, but what's the benefit
of having this sort of pattern?  Why not simply have object_allocators
and make callers use pool.allocate () and pool.remove (x) (with pool.remove
calling the destructor) instead of new and delete?  It feels wrong to me
to tie the data type to a particular allocation object like this.
And using the pool allocator functions directly has the nice property
that you can tell when a delete/remove isn't necessary because the pool
itself is being cleared.

Thanks,
Richard

[Fortran f951, C++14] Fix trans-common.c compilation failure on AIX

2015-07-02 Thread David Edelsohn

After the change to C++14, Fortran trans-common.c fails to compile on
AIX due to function declaration conflicts in unistd.h.

Tobias previously added use of std::map to trans-common.c and included
map first in the source file, before any GCC configuration headers.
map inherently includes various system header files before GCC
system.h and config.h have defined macros affecting system header
files, causing later conflicts when other system header files are
included.

This patch switches the order of inclusion for trans-common.c to
include map last, after system.h, config.h and other GCC headers, as
it is included in other GCC source files.

Bootstrapped on powerpc-ibm-aix7.1.0.0

Okay?

Thanks, David

* trans-common.c: Include map last.

Index: trans-common.c
===
--- trans-common.c  (revision 225349)
+++ trans-common.c  (working copy)
@@ -92,7 +92,6 @@
is examined for still-unused equivalence conditions.  We create a
block for each merged equivalence list.  */

-#include map
 #include config.h
 #include system.h
 #include coretypes.h
@@ -109,6 +108,7 @@
 #include trans-types.h
 #include trans-const.h
 #include target-memory.h
+#include map


 /* Holds a single variable in an equivalence set.  */

Re: [doc] invoke.texi: -mno-fancy-math-387 and FreeBSD

2015-07-02 Thread Andreas Tobler


On 28.06.15 17:50, Gerald Pfeifer wrote:

Now that Andreas is on board, time to dust off some older issues
of mine. :-)

I verified that current GCC HEAD generates fsincos on FreeBSD/i386
with -ffastmath (and no options otherwise), and generates a call
to a sin() function otherwise.

So, okay to apply this patch?  And if so, okay to push back to
GCC 5 and 4.9 as well?


Aehm, sorry for the delay. It is hot here 

Fine with me as far as I can decide/approve.

At least for trunk. For 5.2, I guess you must hurry since Richi wants to 
spin a candidate tomorrow.


On 4.9, I can't decide since I have no idea about the release planning. 
Have to inform myself.



Thanks,
Andreas



On Wed, 16 Feb 2011, Gerald Pfeifer wrote:

The documentation indicates that -mno-fancy-math-387 is the default
on FreeBSD, yet I do not see any code actually implementing that, and
I verified that the following

   #include math.h

   double f(double d) {
 return __builtin_sin(d);
   }

did generate fsin with -ffast-math as the only option.

Richard, http://gcc.gnu.org/ml/gcc-patches/2002-03/msg02001.html was
the last time someone really made changes in this area, though the
reference to FreeBSD predates your patch where you added OpenBSD and
NetBSD for both of which I _do_ see code in config/i386 to that extent,
unlike FreeBSD.

Am I missing something obvious, or is the patch below okay?

Gerald


2011-02-16  Gerald Pfeifer  ger...@pfeifer.com

PR target/37072
* doc/invoke.texi (i386 and x86-64 Options): -mno-fancy-math-387
is not actually the default on FreeBSD.
Uppercase CPU.

Index: doc/invoke.texi
===
--- doc/invoke.texi (revision 170120)
+++ doc/invoke.texi (working copy)
@@ -12273,9 +12273,9 @@
  @opindex mno-fancy-math-387
  Some 387 emulators do not support the @code{sin}, @code{cos} and
  @code{sqrt} instructions for the 387.  Specify this option to avoid
-generating those instructions.  This option is the default on FreeBSD,
+generating those instructions.  This option is the default on
  OpenBSD and NetBSD@.  This option is overridden when @option{-march}
-indicates that the target cpu will always have an FPU and so the
+indicates that the target CPU will always have an FPU and so the
  instruction will not need emulation.  As of revision 2.6.1, these
  instructions are not generated unless you also use the
  @option{-funsafe-math-optimizations} switch.

Re: [Fortran f951, C++14] Fix trans-common.c compilation failure on AIX

2015-07-02 Thread Jakub Jelinek

On Thu, Jul 02, 2015 at 04:20:16PM -0400, David Edelsohn wrote:
 After the change to C++14, Fortran trans-common.c fails to compile on
 AIX due to function declaration conflicts in unistd.h.
 
 Tobias previously added use of std::map to trans-common.c and included
 map first in the source file, before any GCC configuration headers.
 map inherently includes various system header files before GCC
 system.h and config.h have defined macros affecting system header
 files, causing later conflicts when other system header files are
 included.
 
 This patch switches the order of inclusion for trans-common.c to
 include map last, after system.h, config.h and other GCC headers, as
 it is included in other GCC source files.

Generally, system headers should be included either from system.h, or
in between config.h and system.h, or right after config.h and system.h.
Including them after myriads of GCC headers risks conflicts with all the
GCC macros.

Jakub

Re: [patch] Change valid arguments for --with-default-libstdcxx-abi

2015-07-02 Thread Jonathan Wakely


On 01/07/15 14:38 +0100, Jonathan Wakely wrote:

I intend to commit this to trunk and the gcc-5-branch either today or
tomorrow.

I made a mistake in using c++98 and c++11 as the valid arguments
for the ABI configure option, because it confuses people into thinking
that it is affected by, or affects, the -std=c++11 option.  In fact
the ABI mode and -std standard mode are independent.

The inline namespace is called __cxx11, the ABI-tag is cxx11 and the
controlling macro is _GLIBCXX_USE_CXX11_ABI so I think referring to
the ABI as cxx11 might help distinguish it from the -std=c++11 mode.

This will require distros that are using the
--with-default-libstdcxx-abi option to change their build scripts,
because using c++98 (or similar) now gives an error:

checking for default std::string ABI to use... configure: error: Supported arguments for 
--with-default-libstdcxx-abi have changed, use cxx11 or cxx98
Makefile:11390: recipe for target 'configure-target-libstdc++-v3' failed


Richi didn't like the proposed new options, so instead I'm calling
them new and gcc4-compatible.

I'm committing this to trunk and gcc-5-branch.
commit 17e0412726bd01ebde66570e5123b8e660a9098e
Author: Jonathan Wakely jwak...@redhat.com
Date:   Wed Jul 1 14:22:05 2015 +0100

	* acinclude.m4 (GLIBCXX_DEFAULT_ABI): Change valid arguments for
	--with-default-libstdcxx-abi
	* configure: Regenerate.
	* doc/xml/manual/configure.xml: Document valid arguments.

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 8340572..11f48f9 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -3792,7 +3792,7 @@ AC_DEFUN([GLIBCXX_ENABLE_LIBSTDCXX_DUAL_ABI], [
   fi
   if test x$enable_libstdcxx_dual_abi != xyes; then
 AC_MSG_NOTICE([dual ABI is disabled])
-default_libstdcxx_abi=c++98
+default_libstdcxx_abi=gcc4-compatible
   fi
   GLIBCXX_CONDITIONAL(ENABLE_DUAL_ABI, test $enable_libstdcxx_dual_abi = yes)
 ])
@@ -3800,7 +3800,7 @@ AC_DEFUN([GLIBCXX_ENABLE_LIBSTDCXX_DUAL_ABI], [
 dnl
 dnl Check to see which ABI should be enabled by default.
 dnl
-dnl --with-default-libstdcxx-abi={c++98,c++11}
+dnl --with-default-libstdcxx-abi={gcc4-compatible,new}
 dnl
 dnl Defines:
 dnl  _GLIBCXX_USE_CXX11_ABI (always defined, either to 1 or 0)
@@ -3812,14 +3812,16 @@ AC_DEFUN([GLIBCXX_DEFAULT_ABI], [
 AS_HELP_STRING([--with-default-libstdcxx-abi],
[set the std::string ABI to use by default]),
 [case $withval in
-  c++98|gnu++98|c++03|gnu++03)  default_libstdcxx_abi=c++98 ;;
-  c++1?|gnu++1?)  default_libstdcxx_abi=c++11 ;;
-  *)  AC_MSG_ERROR([Invalid argument for --with-default-libstdcxx-abi]) ;;
- esac],
-[default_libstdcxx_abi=c++11])
+  gcc4-compatible)  default_libstdcxx_abi=gcc4-compatible ;;
+  new|cxx11)  default_libstdcxx_abi=new ;;
+  c++*|gnu++*) AC_MSG_ERROR([Supported arguments for --with-default-libstdcxx-abi have changed, use new or gcc4-compatible]) ;;
+  *) AC_MSG_ERROR([Invalid argument for --with-default-libstdcxx-abi]) ;;
+ esac
+ ],
+[default_libstdcxx_abi=new])
   AC_MSG_RESULT(${default_libstdcxx_abi})
   fi
-  if test $default_libstdcxx_abi = c++11; then
+  if test $default_libstdcxx_abi = new; then
 glibcxx_cxx11_abi=1
 glibcxx_cxx98_abi=0
   else
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 1b99c06..e9521d6 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -78180,7 +78180,7 @@ fi
   if test x$enable_libstdcxx_dual_abi != xyes; then
 { $as_echo $as_me:${as_lineno-$LINENO}: dual ABI is disabled 5
 $as_echo $as_me: dual ABI is disabled 6;}
-default_libstdcxx_abi=c++98
+default_libstdcxx_abi=gcc4-compatible
   fi
 
 
@@ -78192,18 +78192,20 @@ $as_echo_n checking for default std::string ABI to use...  6; }
 # Check whether --with-default-libstdcxx-abi was given.
 if test ${with_default_libstdcxx_abi+set} = set; then :
   withval=$with_default_libstdcxx_abi; case $withval in
-  c++98|gnu++98|c++03|gnu++03)  default_libstdcxx_abi=c++98 ;;
-  c++1?|gnu++1?)  default_libstdcxx_abi=c++11 ;;
-  *)  as_fn_error Invalid argument for --with-default-libstdcxx-abi $LINENO 5 ;;
+  gcc4-compatible)  default_libstdcxx_abi=gcc4-compatible ;;
+  new|cxx11)  default_libstdcxx_abi=new ;;
+  c++*|gnu++*) as_fn_error Supported arguments for --with-default-libstdcxx-abi have changed, use \new\ or \gcc4-compatible\ $LINENO 5 ;;
+  *) as_fn_error Invalid argument for --with-default-libstdcxx-abi $LINENO 5 ;;
  esac
+
 else
-  default_libstdcxx_abi=c++11
+  default_libstdcxx_abi=new
 fi
 
   { $as_echo $as_me:${as_lineno-$LINENO}: result: ${default_libstdcxx_abi} 5
 $as_echo ${default_libstdcxx_abi} 6; }
   fi
-  if test $default_libstdcxx_abi = c++11; then
+  if test $default_libstdcxx_abi = new; then
 glibcxx_cxx11_abi=1
 glibcxx_cxx98_abi=0
   else
diff --git a/libstdc++-v3/doc/xml/manual/configure.xml

Re: [RFC, PATCH] Split pool_allocator and create a new object_allocator

2015-07-02 Thread Trevor Saunders

On Thu, Jul 02, 2015 at 09:09:31PM +0100, Richard Sandiford wrote:
 Martin Liška mli...@suse.cz writes:
  diff --git a/gcc/asan.c b/gcc/asan.c
  index e89817e..dabd6f1 100644
  --- a/gcc/asan.c
  +++ b/gcc/asan.c
  @@ -362,20 +362,20 @@ struct asan_mem_ref
 /* Pool allocation new operator.  */
 inline void *operator new (size_t)
 {
  -return pool.allocate ();
  +return ::new (pool.allocate ()) asan_mem_ref ();
 }
   
 /* Delete operator utilizing pool allocation.  */
 inline void operator delete (void *ptr)
 {
  -pool.remove ((asan_mem_ref *) ptr);
  +pool.remove (ptr);
 }
   
 /* Memory allocation pool.  */
  -  static pool_allocatorasan_mem_ref pool;
  +  static pool_allocator pool;
   };
 
 I'm probably going over old ground/wounds, sorry, but what's the benefit
 of having this sort of pattern?  Why not simply have object_allocators
 and make callers use pool.allocate () and pool.remove (x) (with pool.remove
 calling the destructor) instead of new and delete?  It feels wrong to me
 to tie the data type to a particular allocation object like this.

Well the big question is what does allocate() do about construction?  if
it seems wierd for it to not call the ctor, but I'm not sure we can do a
good job of forwarding args to allocate() with C++98.

However it seems kind of wierd the operator new here is calling the
placement new on the object it allocates.

 And using the pool allocator functions directly has the nice property
 that you can tell when a delete/remove isn't necessary because the pool
 itself is being cleared.

Well, all these cases involve a pool with static storage lifetime right?
so actually if you don't delete things in these pool they are
effectively leaked.

Trev

 
 Thanks,
 Richard

74 matches

Mail list logo