Re: [PATCH, rs6000] Use direct moves for __builtin_signbit for 128-bit floating-point

2016-07-01 Thread Bill Schmidt
Hi,

I've revised the patch to address the previous concerns.  rs6000_split_signbit 
has
been greatly simplified since SF and DF modes are not currently of interest.  
The
use of -static-libgcc did indeed turn out to be leftover from long-ago necessity
and is no longer needed.  I also updated the signbit-3.c testcase to make use of
the recently added __float128 builtins.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
Is this ok for trunk, and eventual 6.2 backport?

Thanks!

Bill


[gcc]

2016-07-01  Michael Meissner  
Bill Schmidt  

* config/rs6000/rs6000-protos.h (rs6000_split_signbit): New
prototype.
* config/rs6000/rs6000.c (rs6000_split_signbit): New function.
* config/rs6000/rs6000.md (UNSPEC_SIGNBIT): New constant.
(SIGNBIT): New mode iterator.
(Fsignbit): New mode attribute.
(signbit2): Change operand1 to match FLOAT128 instead of
IBM128; dispatch to gen_signbit{kf,tf}2_dm for __float128
when direct moves are available.
(signbit2_dm): New define_insn_and_split).
(signbit2_dm2): New define_insn.

[gcc/testsuite]

2016-07-01  Michael Meissner  
Bill Schmidt  

* gcc.target/powerpc/signbit-1.c: New test.
* gcc.target/powerpc/signbit-2.c: New test.
* gcc.target/powerpc/signbit-3.c: New test.


Index: gcc/config/rs6000/rs6000-protos.h
===
--- gcc/config/rs6000/rs6000-protos.h   (revision 237929)
+++ gcc/config/rs6000/rs6000-protos.h   (working copy)
@@ -135,6 +135,7 @@ extern bool rs6000_emit_set_const (rtx, rtx);
 extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
 extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
 extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
+extern void rs6000_split_signbit (rtx, rtx);
 extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
 extern void rs6000_expand_atomic_exchange (rtx op[]);
 extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 237929)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -23145,6 +23145,48 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code,
 emit_move_insn (dest, target);
 }
 
+/* Split a signbit operation on 64-bit machines with direct move.  Also allow
+   for the value to come from memory or if it is already loaded into a GPR.  */
+
+void
+rs6000_split_signbit (rtx dest, rtx src)
+{
+  machine_mode d_mode = GET_MODE (dest);
+  machine_mode s_mode = GET_MODE (src);
+  rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
+  rtx shift_reg = dest_di;
+
+  gcc_assert (REG_P (dest));
+  gcc_assert (REG_P (src) || MEM_P (src));
+  gcc_assert (s_mode == KFmode || s_mode == TFmode);
+
+  if (MEM_P (src))
+{
+  rtx mem = (WORDS_BIG_ENDIAN
+? adjust_address (src, DImode, 0)
+: adjust_address (src, DImode, 8));
+  emit_insn (gen_rtx_SET (dest_di, mem));
+}
+
+  else
+{
+  unsigned int r = REGNO (src);
+
+  /* If this is a VSX register, generate the special mfvsrd instruction
+to get it in a GPR.  Until we support SF and DF modes, that will
+ always be true.  */
+  gcc_assert (VSX_REGNO_P (r));
+
+  if (s_mode == KFmode)
+   emit_insn (gen_signbitkf2_dm2 (dest_di, src));
+  else
+   emit_insn (gen_signbittf2_dm2 (dest_di, src));
+}
+
+  emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
+  return;
+}
+
 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
COND is true.  Mark the jump as unlikely to be taken.  */
 
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 237929)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -147,6 +147,7 @@
UNSPEC_ROUND_TO_ODD
UNSPEC_IEEE128_MOVE
UNSPEC_IEEE128_CONVERT
+   UNSPEC_SIGNBIT
   ])
 
 ;;
@@ -508,6 +509,13 @@
(IF "TARGET_FLOAT128")
(TF "TARGET_LONG_DOUBLE_128")])
 
+; Iterator for signbit on 64-bit machines with direct move
+(define_mode_iterator SIGNBIT [(KF "FLOAT128_VECTOR_P (KFmode)")
+  (TF "FLOAT128_VECTOR_P (TFmode)")])
+
+(define_mode_attr Fsignbit [(KF "wa")
+(TF "wa")])
+
 ; Iterator for ISA 3.0 supported floating point types
 (define_mode_iterator FP_ISA3 [SF
   DF
@@ -4567,7 +4575,7 @@
 ;; when little-endian.
 (define_expand "signbit2"
   [(set (match_dup 2)
-   (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "")))
+   

[PATCH] doc fix for c/71560 - union compound literal initializes wrong union field

2016-07-01 Thread Martin Sebor

The bug points out a couple of conformance problems in the GCC
manual where is discusses compound literals and casts to unions
and says that a compound literal is equivalent to a cast.  It
isn't because a compound literal is an lvalue but a cast yields
an rvalue.

The attached patch corrects this error and adds some clarifying
text to illustrate the differences.

Martin
PR c/71560 - union compound literal initializes wrong union field

gcc/ChangeLog:
2016-06-23  Martin Sebor  

	PR c/71560
	* doc/extend.texi (Compound Literals): Correct and clarify.
	(Cast to Union): Same.

Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi	(revision 237617)
+++ gcc/doc/extend.texi	(working copy)
@@ -1863,15 +1863,16 @@ foo (float f, float g)
 @cindex compound literals
 @c The GNU C name for what C99 calls compound literals was "constructor expressions".
 
-ISO C99 supports compound literals.  A compound literal looks like
-a cast containing an initializer.  Its value is an object of the
-type specified in the cast, containing the elements specified in
-the initializer; it is an lvalue.  As an extension, GCC supports
-compound literals in C90 mode and in C++, though the semantics are
-somewhat different in C++.
+A compound literal looks like a cast of a brace-enclosed aggregate
+initializer list.  Its value is an object of the type specified in
+the cast, containing the elements specified in the initializer.
+Unlike the result of a cast, a compound literal is an lvalue.  ISO
+C99 and later support compound literals.  As an extension, GCC
+supports compound literals also in C90 mode and in C++, although
+as explained below, the C++ semantics are somewhat different.
 
-Usually, the specified type is a structure.  Assume that
-@code{struct foo} and @code{structure} are declared as shown:
+Usually, the specified type of a compound literal is a structure.  Assume
+that @code{struct foo} and @code{structure} are declared as shown:
 
 @smallexample
 struct foo @{int a; char b[2];@} structure;
@@ -1905,18 +1906,23 @@ such an initializer, as shown here:
 char **foo = (char *[]) @{ "x", "y", "z" @};
 @end smallexample
 
-Compound literals for scalar types and union types are
-also allowed, but then the compound literal is equivalent
-to a cast.
+Compound literals for scalar types and union types are also allowed.  In
+the following example the variable @var{i} is initialized to the value
+@code{2}, the result of incrementing the unnamed object created by
+the compound literal.
 
+@smallexample
+int i = ++(int) @{ 1 @};
+@end smallexample
+
 As a GNU extension, GCC allows initialization of objects with static storage
-duration by compound literals (which is not possible in ISO C99, because
+duration by compound literals (which is not possible in ISO C99 because
 the initializer is not a constant).
-It is handled as if the object is initialized only with the bracket
-enclosed list if the types of the compound literal and the object match.
-The initializer list of the compound literal must be constant.
+It is handled as if the object were initialized only with the brace-enclosed
+list if the types of the compound literal and the object match.
+The elements of the compound literal must be constant.
 If the object being initialized has array type of unknown size, the size is
-determined by compound literal size.
+determined by the size of the compound literal.
 
 @smallexample
 static struct foo x = (struct foo) @{1, 'a', 'b'@};
@@ -1934,22 +1940,21 @@ static int z[] = @{1, 0, 0@};
 
 In C, a compound literal designates an unnamed object with static or
 automatic storage duration.  In C++, a compound literal designates a
-temporary object, which only lives until the end of its
-full-expression.  As a result, well-defined C code that takes the
-address of a subobject of a compound literal can be undefined in C++,
-so the C++ compiler rejects the conversion of a temporary array to a pointer.
-For instance, if the array compound literal example above appeared
-inside a function, any subsequent use of @samp{foo} in C++ has
-undefined behavior because the lifetime of the array ends after the
-declaration of @samp{foo}.  
+temporary object that only lives until the end of its full-expression.
+As a result, well-defined C code that takes the address of a subobject
+of a compound literal can be undefined in C++, so G++ rejects
+the conversion of a temporary array to a pointer.  For instance, if
+the array compound literal example above appeared inside a function,
+any subsequent use of @samp{foo} in C++ would have undefined behavior
+because the lifetime of the array ends after the declaration of @samp{foo}.
 
-As an optimization, the C++ compiler sometimes gives array compound
-literals longer lifetimes: when the array either appears outside a
-function or has const-qualified type.  If @samp{foo} and its
-initializer had elements of @samp{char *const} type 

[PATCH] simplify-rtx.c: start adding selftests

2016-07-01 Thread David Malcolm
This patch starts adding selftests to simplify-rtx.c, to ensure that
RTL expressions are simplified as we expect.

It adds a new ASSERT_RTX_EQ macro that checks for pointer equality
of two rtx values.  If they're non-equal, it aborts, printing both
expressions.  For example:

../../src/gcc/simplify-rtx.c:6354: test_binary: FAIL: ASSERT_RTL_EQ (b, 
simplify_rtx (gen_rtx_IOR (DImode, a, gen_rtx_AND (DImode, a, b
  expected=0x7ff38a32ab58:
(reg:DI 1 x1)
  actual=0x7ff38a32ab40:
(reg:DI 0 x0)

Successfully bootstrapped on x86_64-pc-linux-gnu

Successful -fself-test of stage1 on powerpc-ibm-aix7.1.3.0 (gcc111).

Successful -fself-test of gcc stage 1 on 197 out of 198 configurations in
contrib/config-list.mk (the 1 failure is an unrelated build failure).

OK for trunk?

gcc/ChangeLog:
* Makefile.in (OBJS): Add selftest-rtl.o.
* selftest-rtl.c: New file.
* selftest-run-tests.c (selftest::run_tests): Add call to
simplify_rtx_c_tests.
* selftest.c (selftest::begin_fail): New function.
(selftest::fail): Reimplement in terms of begin_fail.
(selftest::fail_formatted): Likewise.
* selftest.h (selftest::begin_fail): New declaration.
(selftest::assert_rtx_eq): New declaration.
(selftest::simplify_rtx_c_tests): New declaration.
(ASSERT_RTX_EQ): New macro.
* simplify-rtx.c: Include selftest.h.
(selftest::test_sign_bits): New function.
(selftest::test_unary): New function.
(selftest::test_binary): New function.
(selftest::test_ternary): New function.
(selftest::run_tests_for_mode): New function.
(selftest::simplify_rtx_c_tests): New function.
---
 gcc/Makefile.in  |   1 +
 gcc/selftest-rtl.c   |  55 +
 gcc/selftest-run-tests.c |   1 +
 gcc/selftest.c   |  17 +--
 gcc/selftest.h   |  22 +
 gcc/simplify-rtx.c   | 126 +++
 6 files changed, 218 insertions(+), 4 deletions(-)
 create mode 100644 gcc/selftest-rtl.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index ca7b1f6..de085b0 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1416,6 +1416,7 @@ OBJS = \
sel-sched-ir.o \
sel-sched-dump.o \
sel-sched.o \
+   selftest-rtl.o \
selftest-run-tests.o \
sese.o \
shrink-wrap.o \
diff --git a/gcc/selftest-rtl.c b/gcc/selftest-rtl.c
new file mode 100644
index 000..e44e720
--- /dev/null
+++ b/gcc/selftest-rtl.c
@@ -0,0 +1,55 @@
+/* Selftest support for RTL.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "selftest.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+
+#if CHECKING_P
+
+/* Implementation detail of ASSERT_RTX_EQ.  If val_expected and val_actual
+   fail pointer-equality, print the location, "FAIL: ", and print the
+   mismatching RTL expressions to stderr, then abort.  */
+
+void
+selftest::assert_rtx_eq (const location ,
+const char *desc_expected, const char *desc_actual,
+rtx val_expected, rtx val_actual)
+{
+  if (val_expected == val_actual)
+::selftest::pass (loc, "ASSERT_RTL_EQ");
+  else
+{
+  ::selftest::begin_fail (loc);
+  fprintf (stderr, "ASSERT_RTL_EQ (%s, %s)\n",
+  desc_expected, desc_actual);
+  fprintf (stderr, "  expected=%p:\n", (void *)val_expected);
+  print_rtl (stderr, val_expected);
+  fprintf (stderr, "\n  actual=%p:\n", (void *)val_actual);
+  print_rtl (stderr, val_actual);
+  fprintf (stderr, "\n");
+  abort ();
+   }
+}
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index bddf0b2..cd2883d 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -55,6 +55,7 @@ selftest::run_tests ()
   tree_c_tests ();
   gimple_c_tests ();
   rtl_tests_c_tests ();
+  simplify_rtx_c_tests ();
 
   /* Higher-level tests, or for components that other selftests don't
  rely on.  */
diff --git a/gcc/selftest.c b/gcc/selftest.c
index ed6e517..af2b3f6 100644
--- a/gcc/selftest.c
+++ b/gcc/selftest.c
@@ -26,6 +26,16 @@ along with GCC; see the file COPYING3.  If not see
 
 int selftest::num_passes;
 
+/* 

Re: [PATCH, rs6000] Use direct moves for __builtin_signbit for 128-bit floating-point

2016-07-01 Thread Michael Meissner
On Fri, Jul 01, 2016 at 04:37:35PM -0500, Bill Schmidt wrote:
> Hi Segher,
> 
> > On Jun 29, 2016, at 4:43 PM, Segher Boessenkool 
> >  wrote:
> > 
> > Why does this need -static-libgcc?
> 
> Mike, can you please respond to this?  I was curious about this also
> but neglected to ask you.

The original testing (some 2 years ago) would pick up the wrong libgcc, and
I've just kept using -static-libgcc make it work (it would pick up the host
libgcc which of course does not have the float128 emulation routines).  If
somebody wants to test that it now works, we can remove the flag.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH, rs6000] Use direct moves for __builtin_signbit for 128-bit floating-point

2016-07-01 Thread Bill Schmidt
Hi Segher,

> On Jun 29, 2016, at 4:43 PM, Segher Boessenkool  
> wrote:
> 
> Hi,
> 
> On Tue, Jun 28, 2016 at 04:44:08PM -0500, Bill Schmidt wrote:
>> +void
>> +rs6000_split_signbit (rtx dest, rtx src)
>> +{
>> +  machine_mode d_mode = GET_MODE (dest);
>> +  machine_mode s_mode = GET_MODE (src);
>> +  rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
>> +  rtx shift_reg = dest_di;
>> +
>> +  gcc_assert (REG_P (dest));
>> +  gcc_assert (REG_P (src) || MEM_P (src));
>> +
>> +  if (MEM_P (src))
>> +{
>> +  rtx mem;
>> +
>> +  if (s_mode == SFmode)
>> +mem = gen_rtx_SIGN_EXTEND (DImode, adjust_address (src, SImode, 0));
>> +
>> +  else if (GET_MODE_SIZE (s_mode) == 16 && !WORDS_BIG_ENDIAN)
>> +mem = adjust_address (src, DImode, 8);
>> +
>> +  else
>> +mem = adjust_address (src, DImode, 0);
> 
> else if (s_mode == DFmode)
>  ...
> else
>  gcc_unreachable ();
> 
> Or does this case catch some other modes, too?  Make those explicit, then?
> Or make everything based on size (not mode).
> 
> And put it in order of size if you make that change?

This actually has some leftover cruft in it from the original patch that
supported DF and SF modes.  I'll simplify it considerably.

> 
>> +  if (FLOAT128_IEEE_P (mode))
>> +{
>> +  if (mode == KFmode)
>> +{
>> +  emit_insn (gen_signbitkf2_dm (operands[0], operands[1]));
>> +  DONE;
>> +}
>> +  else if (mode == TFmode)
>> +{
>> +  emit_insn (gen_signbittf2_dm (operands[0], operands[1]));
>> +  DONE;
>> +}
>> +  else
>> +gcc_unreachable ();
>> +}
> 
> If you put a single DONE at the end here things will look simpler.
> 
> Why does the similar thing in rs6000.c construct the RTL by hand?  This
> way is safer.

Good points, will fix.

> 
>> --- gcc/testsuite/gcc.target/powerpc/signbit-1.c (revision 0)
>> +++ gcc/testsuite/gcc.target/powerpc/signbit-1.c (working copy)
>> @@ -0,0 +1,16 @@
>> +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
>> +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
> 
> Default args aren't necessary.
> 
>> --- gcc/testsuite/gcc.target/powerpc/signbit-3.c (revision 0)
>> +++ gcc/testsuite/gcc.target/powerpc/signbit-3.c (working copy)
>> @@ -0,0 +1,172 @@
>> +/* { dg-do run { target { powerpc*-*-linux* } } } */
>> +/* { dg-require-effective-target ppc_float128_sw } */
>> +/* { dg-options "-mcpu=power7 -O2 -mfloat128 -static-libgcc -lm" } */
> 
> Why does this need -static-libgcc?

Mike, can you please respond to this?  I was curious about this also
but neglected to ask you.

Thanks,
Bill

> 
> 
> Segher
> 



Re: [PATCH PR70729] The second part of patch.

2016-07-01 Thread H.J. Lu
On Thu, Jun 30, 2016 at 7:51 AM, Yuri Rumyantsev  wrote:
> Richard,
>
> Could you please review additional simple fix for 70729 - we need to
> nullify safelen field of loops containing simduid intrinsics like
> GOMP_SIMD_LANE (introduced  e.g. for private variables). I checked
> that this fix cures regression which was missed by me since AVX2
> machine is required for  libgomp.fortran/examples-4/simd-2.f90.
>
> Regression testing and bootstrapping did not show any new failures.
> Is it OK for trunk?
>
> Patch:
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 2669813..9fbd183 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -204,6 +204,10 @@ adjust_simduid_builtins (hash_table *htab)
>gcc_assert (TREE_CODE (arg) == SSA_NAME);
>simduid_to_vf *p = NULL, data;
>data.simduid = DECL_UID (SSA_NAME_VAR (arg));
> +  /* Need to nullify loop safelen field since it's value is not
> + valid after transformation.  */
> +  if (bb->loop_father && bb->loop_father->safelen > 0)
> +bb->loop_father->safelen = 0;
>if (htab)
>  {
>p = htab->find ();
>
> ChangeLog:
> 2016-06-30  Yuri Rumyantsev  
>
> PR tree-optimization/70729
> * tree-vectorizer.c (adjust_simduid_builtins): Nullify safelen field
> of loop since it can be not valid after transformation.
>

I still see

FAIL: libgomp.fortran/simd3.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: libgomp.fortran/simd3.f90   -O3 -g  execution test
FAIL: libgomp.fortran/simd4.f90   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution
test
FAIL: libgomp.fortran/simd4.f90   -O3 -g  execution test

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71734

-- 
H.J.


[C PATCH] Fix -Wunused-but-set-* regression with vector indexing (PR c/71719)

2016-07-01 Thread Jakub Jelinek
Hi!

In r236630 we started using VCE for vector indexing, but
* expr.c (mark_exp_read): Handle VIEW_CONVERT_EXPR. 
   
has been changed in C++ FE only, not C FE, while it is needed
in C FE too as the following testcase shows.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-07-01  Jakub Jelinek  

PR c/71719
* c-typeck.c (mark_exp_read): Handle VIEW_CONVERT_EXPR.

* c-c++-common/Wunused-var-15.c: New test.

--- gcc/c/c-typeck.c.jj 2016-06-29 16:10:29.0 +0200
+++ gcc/c/c-typeck.c2016-07-01 16:30:55.756545761 +0200
@@ -1896,6 +1896,7 @@ mark_exp_read (tree exp)
 case IMAGPART_EXPR:
 CASE_CONVERT:
 case ADDR_EXPR:
+case VIEW_CONVERT_EXPR:
   mark_exp_read (TREE_OPERAND (exp, 0));
   break;
 case COMPOUND_EXPR:
--- gcc/testsuite/c-c++-common/Wunused-var-15.c.jj  2016-07-01 
16:39:39.639921566 +0200
+++ gcc/testsuite/c-c++-common/Wunused-var-15.c 2016-07-01 16:38:59.0 
+0200
@@ -0,0 +1,20 @@
+/* PR c/71719 */
+/* { dg-do compile } */
+/* { dg-options "-Wunused -W -Wno-psabi" } */
+
+typedef unsigned V __attribute__ ((vector_size (16)));
+
+void bar (unsigned);
+
+V x;
+
+void
+foo (V v)  /* { dg-bogus "set but not used" } */
+{
+  bar (v[0]);
+  V w = x; /* { dg-bogus "set but not used" } */
+  bar (w[1]);
+}
+
+/* Ignore a warning that is irrelevant to the purpose of this test.  */
+/* { dg-prune-output ".*GCC vector passed by reference.*" } */

Jakub


[PATCH] OpenACC routines in fortran modules

2016-07-01 Thread Cesar Philippidis
It turns out that the acc routine parallelism isn't being recorded in
fortran .mod files. This is a problem because then the ME can't validate
if a routine has compatible parallelism with the call site. This patch
does two things:

 1. Encode gang, worker, vector and seq level parallelism in module
files. This introduces a new oacc_function enum, which I ended
up using to record the parallelism of standalone acc routines too.

 2. Extends gfc_match_oacc_routine to add acc routine directive support
for intrinsic procedures such as abort.

Is this patch OK for trunk? I included support for intrinsic procedures
because it was necessary with my previous patch which treated all calls
to non-acc routines from within an OpenACC offloaded region as errors.
Now that it has been determined that those patches should be link time
errors, we technically don't need to add acc routine support for
intrinsic procedures. So I can drop that part of the patch if necessary.

Cesar
2016-07-01  Cesar Philippidis  

	gcc/fortran/
	* gfortran.h (enum oacc_function): Define.
	(oacc_function_type): Declare.
	(symbol_attribute): Change the type of oacc_function from unsigned
	to an ENUM_BITFIELD.
	* module.c (oacc_function): New DECL_MIO_NAME.
	(mio_symbol_attribute): Set the oacc_function attribute.
	* openmp.c (gfc_oacc_routine_dims): Change the return type from
	int to oacc_function.
	(gfc_match_oacc_routine): Handle intrinsic procedures.
	* symbol.c (oacc_function_types): Define.
	* trans-decl.c (add_attributes_to_decl): Update to handle the
	retyped oacc_function attribute.

	gcc/testsuite/
	* gfortran.dg/goacc/fixed-1.f: Add test coverage.
	* gfortran.dg/goacc/routine-7.f90: New test.

	libgomp/
	* testsuite/libgomp.oacc-fortran/abort-1.f90: Test acc routine
	on intrinsic abort.
	* testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f: Likewise.
	* testsuite/libgomp.oacc-fortran/routine-7.f90: Likewise.

diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 0bb71cb..fac94ca 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -303,6 +303,15 @@ enum save_state
 { SAVE_NONE = 0, SAVE_EXPLICIT, SAVE_IMPLICIT
 };
 
+/* Flags to keep track of ACC routine states.  */
+enum oacc_function
+{ OACC_FUNCTION_NONE = 0,
+  OACC_FUNCTION_SEQ,
+  OACC_FUNCTION_GANG,
+  OACC_FUNCTION_WORKER,
+  OACC_FUNCTION_VECTOR
+};
+
 /* Strings for all symbol attributes.  We use these for dumping the
parse tree, in error messages, and also when reading and writing
modules.  In symbol.c.  */
@@ -312,6 +321,7 @@ extern const mstring intents[];
 extern const mstring access_types[];
 extern const mstring ifsrc_types[];
 extern const mstring save_status[];
+extern const mstring oacc_function_types[];
 
 /* Enumeration of all the generic intrinsic functions.  Used by the
backend for identification of a function.  */
@@ -862,7 +872,7 @@ typedef struct
   unsigned oacc_declare_link:1;
 
   /* This is an OpenACC acclerator function at level N - 1  */
-  unsigned oacc_function:3;
+  ENUM_BITFIELD (oacc_function) oacc_function:3;
 
   /* Attributes set by compiler extensions (!GCC$ ATTRIBUTES).  */
   unsigned ext_attr:EXT_ATTR_NUM;
diff --git a/gcc/fortran/module.c b/gcc/fortran/module.c
index 4d664f0..267858f 100644
--- a/gcc/fortran/module.c
+++ b/gcc/fortran/module.c
@@ -2095,6 +2095,7 @@ DECL_MIO_NAME (procedure_type)
 DECL_MIO_NAME (ref_type)
 DECL_MIO_NAME (sym_flavor)
 DECL_MIO_NAME (sym_intent)
+DECL_MIO_NAME (oacc_function)
 #undef DECL_MIO_NAME
 
 /* Symbol attributes are stored in list with the first three elements
@@ -2116,6 +2117,8 @@ mio_symbol_attribute (symbol_attribute *attr)
   attr->proc = MIO_NAME (procedure_type) (attr->proc, procedures);
   attr->if_source = MIO_NAME (ifsrc) (attr->if_source, ifsrc_types);
   attr->save = MIO_NAME (save_state) (attr->save, save_status);
+  attr->oacc_function = MIO_NAME (oacc_function) (attr->oacc_function,
+		  oacc_function_types);
 
   ext_attr = attr->ext_attr;
   mio_integer ((int *) _attr);
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 865e0d9..10b880c 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -1714,21 +1714,31 @@ gfc_match_oacc_cache (void)
 
 /* Determine the loop level for a routine.   */
 
-static int
+static oacc_function
 gfc_oacc_routine_dims (gfc_omp_clauses *clauses)
 {
   int level = -1;
+  oacc_function ret = OACC_FUNCTION_SEQ;
 
   if (clauses)
 {
   unsigned mask = 0;
 
   if (clauses->gang)
-	level = GOMP_DIM_GANG, mask |= GOMP_DIM_MASK (level);
+	{
+	  level = GOMP_DIM_GANG, mask |= GOMP_DIM_MASK (level);
+	  ret = OACC_FUNCTION_GANG;
+	}
   if (clauses->worker)
-	level = GOMP_DIM_WORKER, mask |= GOMP_DIM_MASK (level);
+	{
+	  level = GOMP_DIM_WORKER, mask |= GOMP_DIM_MASK (level);
+	  ret = OACC_FUNCTION_WORKER;
+	}
   if (clauses->vector)
-	level = GOMP_DIM_VECTOR, mask |= GOMP_DIM_MASK (level);
+	{
+	  level = GOMP_DIM_VECTOR, mask |= GOMP_DIM_MASK 

PING^6: [PATCH] PR target/70454: Build x86 libgomp with -march=i486 or better

2016-07-01 Thread H.J. Lu
On Tue, Jun 7, 2016 at 9:03 AM, H.J. Lu  wrote:
> On Sun, May 29, 2016 at 3:14 PM, H.J. Lu  wrote:
>> On Fri, May 20, 2016 at 8:04 AM, H.J. Lu  wrote:
>>> On Mon, May 9, 2016 at 5:52 AM, H.J. Lu  wrote:
 On Mon, May 2, 2016 at 6:46 AM, H.J. Lu  wrote:
> On Mon, Apr 25, 2016 at 1:36 PM, H.J. Lu  wrote:
>> If x86 libgomp isn't compiled with -march=i486 or better, append
>> -march=i486 XCFLAGS for x86 libgomp build.
>>
>> Tested on i686 with and without --with-arch=i386.  Tested on
>> x86-64 with and without --with-arch_32=i386.  OK for trunk?
>>
>>
>> H.J.
>> ---
>> PR target/70454
>> * configure.tgt (XCFLAGS): Append -march=i486 to compile x86
>> libgomp if needed.
>> ---
>>  libgomp/configure.tgt | 36 
>>  1 file changed, 16 insertions(+), 20 deletions(-)
>>
>> diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
>> index 77e73f0..c876e80 100644
>> --- a/libgomp/configure.tgt
>> +++ b/libgomp/configure.tgt
>> @@ -67,28 +67,24 @@ if test x$enable_linux_futex = xyes; then
>> ;;
>>
>>  # Note that bare i386 is not included here.  We need cmpxchg.
>> -i[456]86-*-linux*)
>> +i[456]86-*-linux* | x86_64-*-linux*)
>> config_path="linux/x86 linux posix"
>> -   case " ${CC} ${CFLAGS} " in
>> - *" -m64 "*|*" -mx32 "*)
>> -   ;;
>> - *)
>> -   if test -z "$with_arch"; then
>> - XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
>> +   # Need i486 or better.
>> +   cat > conftestx.c <> +#if defined __x86_64__ || defined __i486__ || defined __pentium__ \
>> +  || defined __pentiumpro__ || defined __pentium4__ \
>> +  || defined __geode__ || defined __SSE__
>> +# error Need i486 or better
>> +#endif
>> +EOF
>> +   if ${CC} ${CFLAGS} -c -o conftestx.o conftestx.c > /dev/null 
>> 2>&1; then
>> +   if test "${target_cpu}" = x86_64; then
>> +   XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
>> +   else
>> +   XCFLAGS="${XCFLAGS} -march=i486 -mtune=${target_cpu}"
>> fi
>> -   esac
>> -   ;;
>> -
>> -# Similar jiggery-pokery for x86_64 multilibs, except here we
>> -# can't rely on the --with-arch configure option, since that
>> -# applies to the 64-bit side.
>> -x86_64-*-linux*)
>> -   config_path="linux/x86 linux posix"
>> -   case " ${CC} ${CFLAGS} " in
>> - *" -m32 "*)
>> -   XCFLAGS="${XCFLAGS} -march=i486 -mtune=generic"
>> -   ;;
>> -   esac
>> +   fi
>> +   rm -f conftestx.c conftestx.o
>> ;;
>>
>>  # Note that sparcv7 and sparcv8 is not included here.  We need cas.
>> --
>> 2.5.5
>>
>
> PING.
>

 PING.

>>>
>>> PING.
>>>
>>
>> PING.
>
> PING.
>

PING.

-- 
H.J.


Re: [PATCHv3, testsuite] Enable some existing __float128 tests for powerpc

2016-07-01 Thread Bill Schmidt

> On Jul 1, 2016, at 2:49 PM, Joseph Myers  wrote:
> 
> On Fri, 1 Jul 2016, Bill Schmidt wrote:
> 
>> +/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */
> 
> I still think you should be using { dg-add-options float128 }, via 
> defining add_options_for_float128, rather than hardcoding these options 
> in individual tests.

Sorry, I seem to have missed that suggestion along the way.  Too
many balls in the air this week.  I'll look into it.

Bill

> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com
> 



Re: [PATCHv3, testsuite] Enable some existing __float128 tests for powerpc

2016-07-01 Thread David Edelsohn
On Fri, Jul 1, 2016 at 3:39 PM, Bill Schmidt
 wrote:
> Hi,
>
> This changes my previous patch to use a separate base_quadfloat_support
> effective target that can be used in conjunction with either the float128
> or __float128 effective targets, and modifies the tests accordingly.
> I dropped adding support for float128-exact-underflow.c for now; that can
> be addressed later if desired.
>
> Tested on powerpc64[le]-unknown-linux-gnu.  Is this ok for trunk, and
> eventual backport to gcc-6-branch?
>
> Thanks,
> Bill
>
>
> 2016-07-01  Bill Schmidt  
>
> * gcc.dg/const-float128-ped.c: Require __float128 effective
> target, and specify additional options for powerpc*-*-*.
> * gcc.dg/const-float128.c: Likewise.
> * gcc.dg/torture/float128-cmp-invalid.c: Require
> __float128 and base_quadfloat_support effective targets, and
> specify additional options for powerpc*-*-*.
> * gcc.dg/torture/float128-div-underflow.c: Likewise.
> * gcc.dg/torture/float128-extend-nan.c: Require
> __float128 and base-quadfloat_support effective targets, and
> specify additional options for powerpc*-*-*.
> * gcc.dg/torture/float128-nan.c: Likewise.
> * gcc.dg/torture/fp-int-convert-float128-timode-2.c: Likewise.
> * gcc.dg/torture/fp-int-convert-float128-timode-3.c: Likewise.
> * gcc.dg/torture/fp-int-convert-float128-timode.c: Likewise.
> * lib/target-supports.exp (check_effective_target___float128):
> New.
> (check_effective_target_base_quadword_support): New.

This is okay with the dg-add-options change that Joseph recommends.

Thanks, David


Re: [PATCHv3, testsuite] Enable some existing __float128 tests for powerpc

2016-07-01 Thread Joseph Myers
On Fri, 1 Jul 2016, Bill Schmidt wrote:

> +/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */

I still think you should be using { dg-add-options float128 }, via 
defining add_options_for_float128, rather than hardcoding these options 
in individual tests.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCHv3, testsuite] Enable some existing __float128 tests for powerpc

2016-07-01 Thread Bill Schmidt
Hi,

This changes my previous patch to use a separate base_quadfloat_support
effective target that can be used in conjunction with either the float128
or __float128 effective targets, and modifies the tests accordingly.
I dropped adding support for float128-exact-underflow.c for now; that can
be addressed later if desired.

Tested on powerpc64[le]-unknown-linux-gnu.  Is this ok for trunk, and
eventual backport to gcc-6-branch?

Thanks,
Bill


2016-07-01  Bill Schmidt  

* gcc.dg/const-float128-ped.c: Require __float128 effective
target, and specify additional options for powerpc*-*-*.
* gcc.dg/const-float128.c: Likewise.
* gcc.dg/torture/float128-cmp-invalid.c: Require
__float128 and base_quadfloat_support effective targets, and
specify additional options for powerpc*-*-*.
* gcc.dg/torture/float128-div-underflow.c: Likewise.
* gcc.dg/torture/float128-extend-nan.c: Require
__float128 and base-quadfloat_support effective targets, and
specify additional options for powerpc*-*-*.
* gcc.dg/torture/float128-nan.c: Likewise.
* gcc.dg/torture/fp-int-convert-float128-timode-2.c: Likewise.
* gcc.dg/torture/fp-int-convert-float128-timode-3.c: Likewise.
* gcc.dg/torture/fp-int-convert-float128-timode.c: Likewise.
* lib/target-supports.exp (check_effective_target___float128):
New.
(check_effective_target_base_quadword_support): New.


Index: gcc/testsuite/gcc.dg/const-float128-ped.c
===
--- gcc/testsuite/gcc.dg/const-float128-ped.c   (revision 237802)
+++ gcc/testsuite/gcc.dg/const-float128-ped.c   (working copy)
@@ -1,5 +1,7 @@
 /* Test 'q' suffix with -pedantic on __float128 type constants.  */
-/* { dg-do compile { target ia64-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-do compile } */
+/* { dg-require-effective-target __float128 } */
 /* { dg-options "-pedantic" } */
+/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */
 
 __float128 a = 123.456789q; /* { dg-warning "non-standard suffix on floating 
constant" } */
Index: gcc/testsuite/gcc.dg/const-float128.c
===
--- gcc/testsuite/gcc.dg/const-float128.c   (revision 237802)
+++ gcc/testsuite/gcc.dg/const-float128.c   (working copy)
@@ -1,6 +1,8 @@
 /* Test 'q' and 'Q' suffixes on __float128 type constants.  */
-/* { dg-do compile { target ia64-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-do compile } */
+/* { dg-require-effective-target __float128 } */
 /* { dg-options "" } */
+/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */
 
 __float128 a = 123.456789q;
 __float128 b = 123.456789Q;
Index: gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c
===
--- gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c (revision 237802)
+++ gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c (working copy)
@@ -1,7 +1,10 @@
 /* Test for "invalid" exceptions from __float128 comparisons.  */
-/* { dg-do run { target i?86-*-* x86_64-*-* ia64-*-* } } */
+/* { dg-do run } */
 /* { dg-options "" } */
+/* { dg-require-effective-target __float128 } */
+/* { dg-require-effective-target base_quadfloat_support } */
 /* { dg-require-effective-target fenv_exceptions } */
+/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */
 
 #include 
 #include 
Index: gcc/testsuite/gcc.dg/torture/float128-div-underflow.c
===
--- gcc/testsuite/gcc.dg/torture/float128-div-underflow.c   (revision 
237802)
+++ gcc/testsuite/gcc.dg/torture/float128-div-underflow.c   (working copy)
@@ -1,7 +1,10 @@
 /* Test for spurious underflow from __float128 division.  */
-/* { dg-do run { target i?86-*-* x86_64-*-* ia64-*-* } } */
+/* { dg-do run } */
 /* { dg-options "" } */
+/* { dg-require-effective-target __float128 } */
+/* { dg-require-effective-target base_quadfloat_support } */
 /* { dg-require-effective-target fenv_exceptions } */
+/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */
 
 #include 
 #include 
Index: gcc/testsuite/gcc.dg/torture/float128-extend-nan.c
===
--- gcc/testsuite/gcc.dg/torture/float128-extend-nan.c  (revision 237802)
+++ gcc/testsuite/gcc.dg/torture/float128-extend-nan.c  (working copy)
@@ -1,7 +1,10 @@
 /* Test extensions to __float128 quiet signaling NaNs.  */
-/* { dg-do run { target i?86-*-* x86_64-*-* ia64-*-* } } */
+/* { dg-do run } */
 /* { dg-options "-fsignaling-nans" } */
+/* { dg-require-effective-target __float128 } */
+/* { dg-require-effective-target base_quadfloat_support } */
 /* { dg-require-effective-target fenv_exceptions } */
+/* { dg-additional-options "-mfloat128 -mvsx" { target powerpc*-*-* } } */
 

Re: [PATCH][ARM] -mpure-code option for ARM

2016-07-01 Thread Sandra Loosemore

On 06/30/2016 08:32 AM, Andre Vieira (lists) wrote:


@@ -14498,6 +14499,14 @@ Print CPU tuning information as comment in assembler 
file.  This is
 an option used only for regression testing of the compiler and not
 intended for ordinary use in compiling code.  This option is disabled
 by default.
+
+@item -mpure-code
+@opindex mpure-code
+GCC will generate assembly where instructions and data are in separate 
sections.


s/will generate/generates/


+It also gives all text sections the ELF processor-specific section attribute
+SHF_ARM_PURECODE.  This option is only available for ELF targets in thumb mode
+that support Thumb2.


The proper spellings are "Thumb" and "Thumb-2", but wouldn't it be more 
clear just to say  "This option is only available when generating 
Thumb-2 code for ELF targets." ??



diff --git a/gcc/target.def b/gcc/target.def
index 
a4df363698ce776b51d11c187baed2069ba88a52..514db908d33b6ae77b33772e1d92fc25dc760c92
 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -432,6 +432,16 @@ this section is associated.",
  void, (const char *name, unsigned int flags, tree decl),
  default_no_named_section)

+/* Tell assembler what section attributes to assign this elf section
+   declaration, using their numerical value.  */
+DEFHOOK
+(elf_flags_numeric,
+ "If this function returns true, it will pass the section attributes using\n\
+their numeric value in @var{num} based on the bits set in @var{flags}.  This\n\
+function is called by @code{default_elf_asm_named_section}.",
+ bool, (unsigned int flags, unsigned int *num),
+ default_asm_elf_flags_numeric)
+


I don't understand "will pass" means in this context, even with the 
additional description you included in your patch e-mail.  I think it 
would be more helpful to communicate the purpose of this hook if you 
could rethink this as "If the target needs to , this hook should 
be implemented to ."  And again, please document current behavior 
in the present tense, not the future tense.


-Sandra



[PATCH] - improve sprintf buffer overflow detection (middle-end/49905)

2016-07-01 Thread Martin Sebor

The attached patch enhances compile-time checking for buffer overflow
and output truncation in non-trivial calls to the sprintf family of
functions under a new option -Wformat-length=[12].  This initial
patch handles printf directives with string, integer, and simple
floating arguments but eventually I'd like to extend it all other
functions and directives for which it makes sense.

I made some choices in the implementation that resulted in trade-offs
in the quality of the diagnostics.  I would be grateful for comments
and suggestions how to improve them.  Besides the list I include
Jakub who already gave me some feedback (thanks), Joseph who as
I understand has deep knowledge of the c-format.c code, and Richard
for his input on the LTO concern below.

1) Making use of -Wformat machinery in c-family/c-format.c.  This
   seemed preferable to duplicating some of the same code elsewhere
   (I initially started implementing it in expand_builtin in
   builtins.c).  It makes the implementation readily extensible
   to all the same formats as those already handled for -Wformat.
   One drawback is that unlike in expand_builtin, calls to these
   functions cannot readily be folded.  Another drawback pointed
   out by Jakub is that since the code is only available in the
   C and C++ compilers, it apparently may not be available with
   an LTO compiler (I don't completely understand this problem
   but I mention it in the interest of full disclosure). In light
   of the dependency in (2) below, I don't see a way to avoid it
   (moving c-format.c to the middle end was suggested but seemed
   like too much of a change to me).

2) Optimization.
   In keeping with the other -Wformat options, the checking is
   enabled without optimization.  Especially at level 2, the
   warnings can be useful even without it.  But to make buffer
   sizes and non-constant argument values available in calls to
   functions like sprintf (via __builtin_object_size) better
   results are obtained with optimization.

3) Truncation warnings.
   Although calls to bounded functions like snprintf aren't subject
   to buffer overflow, they can be subject to accidental truncation
   when the destination buffer isn't sized appropriately.  With the
   patch, such calls are diagnosed under the same option, but I
   wonder if have a separate warning option for them might be
   preferable (e.g., -Wformat-trunc=[01] or something like that).
   Independently, it might be useful to differentiate between
   truncating calls that check the return value and those that
   don't.

Besides the usual testing I compiled several packages with the
warning.  If found a few bugs in boundary cases in Binutils that
are being fixed.

Thanks
Martin

PS There are a few FIXME notes in the patch that I will either
fix or remove, depending on feedback, before committing the
patch.
PR middle-end/49905 - Better sanity checking on sprintf src & dest
  to produce warning for dodgy code

gcc/c/ChangeLog:
2016-07-01  Martin Sebor  

	PR middle-end/49905
	* c-lang.c (LANG_HOOKS_CHECK_FORMAT_LENGTH): Define.
	* c-typeck.c (check_function_arguments): Add an argument.

gcc/c-family/ChangeLog:
2016-07-01  Martin Sebor  

	PR middle-end/49905
	* c-common.c (check_function_arguments): Add an argument and pass
	it to check_function_format.
	* c-common.h (check_function_arguments): Add an argument.
	* c-format.c (function_format_info): Add new members.
	(conversion_spec): New struct.
	(check_format_length, ilog, tree_ilog, format_integer, format_floating,
	format_string, bytes_remaining): New functions.
	(print_char_table): Add initializers.
	(format_check_results): Add members.
	(check_function_format): Add an argument.
	(check_format_info): Add an argument.
	(check_format_info_main): Track the length of the formatted output
	and diagnose buffer overflow and truncation.
	(check_format_length): Handle -Wformat-length.
	(format_type_warning): Avoid bogus warnings when called during
	expansion.
	* c-format.h (format_char_info): Add members.
	* c.opt (-Wformat-legth=): Add new option.

gcc/cp/ChangeLog:
2016-07-01  Martin Sebor  

	PR middle-end/49905
	* call.c (build_over_call): Pass function decl to
	check_function_arguments.
	* typeck.c (cp_build_function_call_vec): Same.
	* cp/cp-lang.c: Define LANG_HOOKS_CHECK_FORMAT_LENGTH.

gcc/testsuite/ChangeLog:
2016-07-01  Martin Sebor  

	PR middle-end/49905
	* gcc.dg/format/c99-sprintf-length-1.c: New test.
	* gcc.dg/format/c99-sprintf-length-2.c: New test.
	* gcc.dg/format/c99-sprintf-length-opt.c: New test.

gcc/ChangeLog:
2016-07-01  Martin Sebor  

	PR middle-end/49905
	* builtins.c (expand_builtin): Call maybe_emit_snprintf_trunc_warning.
	Avoid issuing a duplicate warning already issued by -Wformat-length.
	(maybe_emit_sprintf_chk_warning): Call maybe_emit_snprintf_trunc_warning.
	(maybe_emit_snprintf_trunc_warning): New function.
	* 

Re: [PATCH, rs6000] Fix PR target 71656, reload ICE when -mcpu=power9 -mpower9-dform-vector

2016-07-01 Thread Peter Bergner

On 6/27/16 8:30 PM, Peter Bergner wrote:

On 6/27/16 3:21 PM, Segher Boessenkool wrote:

On Sat, Jun 25, 2016 at 07:14:01PM -0500, Peter Bergner wrote:
Okay for trunk, okay for 6 later.  One comment:


+  if (VECTOR_MODE_P (mode)
+  && !mode_supports_vsx_dform_quad (mode))
+return false;

   if (GET_CODE (addr) != PLUS)
 return false;

   op0 = XEXP (addr, 0);
-  if (!base_reg_operand (op0, Pmode))
+  if (!REG_P (op0)
+  || !INT_REG_OK_FOR_BASE_P (op0, strict))
 return false;


Just put these short conditionals on one line each?  It looks silly ;-)


Ok, committed to trunk with that change.  Thanks!


...and now committed to the FSF 6 branch.  Thanks.

Peter





Re: [RFC] Change order of Fortran BLOCK_VARS (PR fortran/71687, take 2)

2016-07-01 Thread Mikael Morin

Le 01/07/2016 16:58, Jakub Jelinek a écrit :

On Thu, Jun 30, 2016 at 08:06:54PM +0200, Jakub Jelinek wrote:

So, is it intentional that pushdecl / getdecls acts like a LIFO?
Though, it seems user vars are pushdecled in the reverse order of
declarations, but gfc_add_decl_to_function is called in the user declared
order, so perhaps for the following patch we'd also need to
decl = nreverse (saved_function_decls);
in gfc_generate_function_code and similarly in gfc_process_block_locals
decl = nreverse (saved_local_decls);


Here is an updated patch with the above mentioned two calls to nreverse
added.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


Hello,

I'm a bit surprised that it has worked so well so far.
We should probably switch at some point to using a vec to collect decls 
without headache.  In the mean time, the patch is OK.  Thanks.


Mikael



Re: [PATCH 2/4] PR c++/62314: add fixit hint for "expected ';' after class definition"

2016-07-01 Thread David Malcolm
On Mon, 2016-05-02 at 12:40 +0200, Bernd Schmidt wrote:
> On 04/28/2016 04:28 PM, David Malcolm wrote:
> > whereas clang reportedly emits:
> > 
> > test.c:2:12: error: expected ';' after struct
> >   struct a {}
> >  ^
> >  ;
> > 
> > (note the offset of the location, and the fix-it hint)
> > 
> > The following patch gives us the latter, more readable output.
> 
> Huh. Only the non-C++ parts remain to be reviewed, and I have no 
> technical objections, but do people really want this? To me that
> looks 
> like unnecessary visual clutter that eats up vertical space for no 
> reason. I know what a semicolon looks like without the compiler
> telling 
> me twice.

My own opinion is that it's worth spending the extra line to get the
semicolon under the caret, as (IMHO) it makes things slightly clearer.

A better argument is that as of r237712 we now have -fdiagnostics
-parseable-fixits.  This allows for an IDE to offer to automatically
apply a fix-it hint.  Hence by providing a fix-it here, an IDE can
potentially insert the semicolon itself:

$ ./xgcc -B. ../../src/gcc/testsuite/g++.dg/pr62314-2.C \
   -fdiagnostics-parseable-fixits

../../src/gcc/testsuite/g++.dg/pr62314-2.C:4:11: error: expected ‘;’ after 
class definition
 class a {} // { dg-error "11: expected .;. after class definition" }
   ^
   ;
fix-it:"../../src/gcc/testsuite/g++.dg/pr62314-2.C":{4:11-4:11}:";"
../../src/gcc/testsuite/g++.dg/pr62314-2.C:8:2: error: expected ‘;’ after 
struct definition
 } // { dg-error "2: expected .;. after struct definition" }
  ^
  ;
fix-it:"../../src/gcc/testsuite/g++.dg/pr62314-2.C":{8:2-8:2}:";"


I believe that on building this within a sufficiently recent version of
Xcode that Xcode can offer to insert the semicolon directly.
I'm hoping someone implements this for Emacs.

In that light, is the patch OK?

Thanks
Dave



Re: [PATCH, rs6000] Fix PR target/71698, ICE in reload copying TDmode values to GPR regs

2016-07-01 Thread Peter Bergner

On 6/30/16 6:21 PM, Segher Boessenkool wrote:

On Thu, Jun 30, 2016 at 05:55:04PM -0500, Peter Bergner wrote:

We currently don't allow TDmode values to use direct moves, since they
live in register pairs and the most significant word is always in the
even-numbered register which does not match subreg ordering in little
endian mode.  The following patch fixes PR71698 by disallowing reload
from using direct moves for TDmode values.

This passed bootstrap and regtesting with no regressions. Ok for trunk?


Yes, this is okay.  Thanks!


This is also broken on the FSF 6 branch, so is this ok there too after
bootstrap and regtesting there?


Okay.


Committed to trunk and the FSF 6 branch after bootstrap and regtesting
there showed no regressions.  Thanks!

Peter




Re: Improve insert/emplace robustness to self insertion

2016-07-01 Thread Jonathan Wakely

On 01/07/16 10:54 +0100, Jonathan Wakely wrote:

On 30/06/16 21:51 +0200, François Dumont wrote:

On 29/06/2016 23:30, Jonathan Wakely wrote:


   iterator
   insert(const_iterator __position, value_type&& __x)
   { return emplace(__position, std::move(__x)); }

That's suboptimal, since in the general case we need an extra
construction for emplacing, but we know that we don't need to do that
when inserting rvalues.


  Why not ? I realized with your remarks that I was missing some 
tests in the new self_insert.cc. The ones to insert an rvalue coming 
from the vector itself. In the attached patch there is those 2 
tests, do you agree with expected behavior ? For the moment it 
doesn't check that the source value has been indeed moved cause it 
doesn't, I will update it once it does.


No, I don't agree, because this is undefined behaviour:

 vv.insert(vv.begin(), std::move(vv[0]));

We don't need to support that case.

17.6.4.9 [res.on.arguments] says:

— If a function argument binds to an rvalue reference parameter, the
implementation may assume that this parameter is a unique reference
to this argument.

i.e. when passed an rvalue we can assume it is not a reference to
something in the container.

That's why we should not perform any more operations when inserting
rvalues than we do now. Any increase in copies/moves for inserting
rvalues is a regression, and should be avoided.



Here's what I'm planning to commit soon. This also fixes a problem
where the temporaries we create are not constructed+destroyed with the
allocator, which they must be.


commit 6012a80b33eab6501cd620fb710f9805d21cc4b3
Author: Jonathan Wakely 
Date:   Fri Jul 1 16:49:15 2016 +0100

Add tests for inserting aliased objects into std::vector

2016-07-01  Fran??ois Dumont  

	* testsuite/23_containers/vector/modifiers/emplace/self_emplace.cc:
	New test.
	* testsuite/23_containers/vector/modifiers/insert/self_insert.cc: New
	test.

diff --git a/libstdc++-v3/testsuite/23_containers/vector/modifiers/emplace/self_emplace.cc b/libstdc++-v3/testsuite/23_containers/vector/modifiers/emplace/self_emplace.cc
new file mode 100644
index 000..d452b5b
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/vector/modifiers/emplace/self_emplace.cc
@@ -0,0 +1,144 @@
+// { dg-options "-std=gnu++11" }
+
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+#include 
+#include "testsuite_hooks.h"
+
+bool test __attribute__((unused)) = true;
+
+void
+test01()
+{
+  std::vector vv =
+{
+  { 2, 3 },
+  { 4, 5 },
+  { 0, 1 }
+};
+
+  // Make sure emplace will imply reallocation.
+  VERIFY( vv.capacity() == 3 );
+
+  vv.emplace(vv.begin(), vv[0]);
+
+  VERIFY( vv.size() == 4 );
+  VERIFY( vv[0].size() == 2 );
+  VERIFY( vv[0][0] == 2 );
+  VERIFY( vv[0][1] == 3 );
+}
+
+void
+test02()
+{
+  std::vector vv =
+{
+  { 2, 3 },
+  { 4, 5 },
+  { 0, 1 }
+};
+
+  // Make sure emplace won't reallocate.
+  vv.reserve(4);
+  vv.emplace(vv.begin(), vv[0]);
+
+  VERIFY( vv.size() == 4 );
+  VERIFY( vv[0].size() == 2 );
+  VERIFY( vv[0][0] == 2 );
+  VERIFY( vv[0][1] == 3 );
+}
+
+struct A
+{
+  A(int i) : _i(i)
+  { }
+
+  A(const A& other) : _i(other._i)
+  {
+VERIFY( other._i >= 0 );
+  }
+
+  A(A&& other) : _i(other._i)
+  {
+VERIFY( other._i >= 0 );
+
+other._i = -1;
+  }
+
+  A(std::vector::iterator it) : _i(it->_i)
+  {
+VERIFY( it->_i >= 0 );
+  }
+
+  A& operator=(const A&) = default;
+  A& operator=(A&& other)
+  {
+VERIFY(other._i >= 0 );
+
+_i = other._i;
+other._i = -1;
+return *this;
+  }
+
+  int _i;
+};
+
+void
+test03()
+{
+  std::vector va =
+{
+  { A(1) },
+  { A(2) },
+  { A(3) }
+};
+
+  // Make sure emplace will imply reallocation.
+  VERIFY( va.capacity() == 3 );
+
+  va.emplace(va.begin(), va.begin());
+
+  VERIFY( va.size() == 4 );
+  VERIFY( va[0]._i == 1 );
+}
+
+void
+test04()
+{
+  std::vector va =
+{
+  { A(1) },
+  { A(2) },
+  { A(3) }
+};
+
+  // Make sure emplace won't reallocate.
+  va.reserve(4);
+  va.emplace(va.begin(), va.begin());
+
+  VERIFY( va.size() == 4 );
+  VERIFY( va[0]._i == 1 );
+}
+
+int main()
+{

Re: [PATCH] fix interaction of -S and -x {c,c++}-header

2016-07-01 Thread Jan Beulich
>>> On 01.07.16 at 17:17,  wrote:
> On 07/01/2016 10:22 AM, Jan Beulich wrote:
>> Irrespective of the use of -o this so far resulted in "error: output
>> filename specified twice", since cc1_options already produces a -o
>> option when -S was specified.
>>
>> gcc/
>> 2016-07-01  Jan Beulich  
>>
>>  * varasm.c (get_variable_section): Validate initializer in
>>  named .bss-like sections.
> 
> Ok with the right ChangeLog.

Oops, I'm sorry for that. This is what it was supposed to be:

gcc/
2016-07-01  Jan Beulich  

* gcc.c (default_compilers["@c-header"]): Conditionalize "-o".

gcc/cp/
2016-07-01  Jan Beulich  

* lang-specs.h ("@c++-header"): Conditionalize "-o".

gcc/testsuite/
2016-07-01  Jan Beulich  

* g++.dg/header.c: New.
* gcc.dg/header.c: New.

Jan



[C++ PATCH] Fix ICE with PTRMEM_CST (PR c++/70869)

2016-07-01 Thread Jakub Jelinek
Hi!

As mentioned in the PR, we ICE on these testcases because PTRMEM_CST for
non-static var DECL_INITIAL is now supposed to be replaced during
genericization, but for some artifical vars the initializers are actually
never genericized.

For user variables, the VAR_DECLs should appear in BIND_EXPR_VARS and
walk_tree walks those when walking the corresponding BIND_EXPR.
So I think walking the initializers for non-artificial vars is a waste of
time, though genericization is using a pointer-set and thus shouldn't walk
anything multiple times, so if you think dropping the DECL_ARTIFICIAL
is desirable, I can try to test that.
The walking is only done on DECL_EXPR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/6.2?

2016-07-01  Jakub Jelinek  
Kai Tietz  

PR c++/70869
PR c++/71054
* cp-gimplify.c (cp_genericize_r): For DECL_EXPR for non-static
artificial vars, genericize their initializers.

* g++.dg/cpp0x/pr70869.C: New test.
* g++.dg/cpp0x/pr71054.C: New test.

--- gcc/cp/cp-gimplify.c.jj 2016-06-15 09:17:22.0 +0200
+++ gcc/cp/cp-gimplify.c2016-07-01 14:36:16.222764199 +0200
@@ -1304,7 +1304,15 @@ cp_genericize_r (tree *stmt_p, int *walk
 {
   tree d = DECL_EXPR_DECL (stmt);
   if (TREE_CODE (d) == VAR_DECL)
-   gcc_assert (CP_DECL_THREAD_LOCAL_P (d) == DECL_THREAD_LOCAL_P (d));
+   {
+ gcc_assert (CP_DECL_THREAD_LOCAL_P (d) == DECL_THREAD_LOCAL_P (d));
+ /* User var initializers should be genericized during containing
+BIND_EXPR genericization when walk_tree walks DECL_INITIAL
+of BIND_EXPR_VARS.  Artificial temporaries might not be
+mentioned there though, so walk them now.  */
+ if (DECL_ARTIFICIAL (d) && !TREE_STATIC (d) && DECL_INITIAL (d))
+   cp_walk_tree (_INITIAL (d), cp_genericize_r, data, NULL);
+   }
 }
   else if (TREE_CODE (stmt) == OMP_PARALLEL
   || TREE_CODE (stmt) == OMP_TASK
--- gcc/testsuite/g++.dg/cpp0x/pr70869.C.jj 2016-07-01 14:45:47.737806235 
+0200
+++ gcc/testsuite/g++.dg/cpp0x/pr70869.C2016-07-01 14:45:09.0 
+0200
@@ -0,0 +1,25 @@
+// PR c++/70869
+// { dg-do run { target c++11 } }
+
+#include 
+
+struct A
+{
+  int f () { return 1; }
+  int g () { return 2; }
+  int h () { return 3; }
+};
+
+int
+main ()
+{
+  int cnt = 0;
+  for (const auto  : { ::f, ::g, ::h })
+{
+  A a;
+  if ((a.*m) () != ++cnt)
+   __builtin_abort ();
+}
+  if (cnt != 3)
+__builtin_abort ();
+}
--- gcc/testsuite/g++.dg/cpp0x/pr71054.C.jj 2016-07-01 14:53:31.650154643 
+0200
+++ gcc/testsuite/g++.dg/cpp0x/pr71054.C2016-07-01 14:53:25.0 
+0200
@@ -0,0 +1,21 @@
+// PR c++/71054
+// { dg-do compile { target c++11 } }
+
+#include 
+
+template 
+struct S
+{
+  struct A
+  {
+int a;
+int b;
+T p;
+  };
+  S () { std::initializer_list a{ {0, 0, ::V} }; }
+};
+struct R {
+  void V (int);
+  void U (int);
+};
+S b;

Jakub


[gomp4] backport fortran FE error handling changes

2016-07-01 Thread Cesar Philippidis
I've applied this patch to gomp-4_0-branch which backports the recent
error handling changes I made to the fortran FE. The discussion for the
original patch for trunk can be found in this thread
.
2016-07-01  Cesar Philippidis  

	Back port from trunk
	2016-06-29  Cesar Philippidis  

	gcc/fortran/
	* openmp.c (match_oacc_clause_gang): Rename to ...
	(match_oacc_clause_gwv): this.  Add support for OpenACC worker and
	vector clauses.
	(gfc_match_omp_clauses): Use match_oacc_clause_gwv for
	OMP_CLAUSE_{GANG,WORKER,VECTOR}.  Propagate any MATCH_ERRORs for
	invalid OMP_CLAUSE_{ASYNC,WAIT,GANG,WORKER,VECTOR} clauses.
	(gfc_match_oacc_wait): Propagate MATCH_ERROR for invalid
	oacc_expr_lists.  Adjust the first and needs_space arguments to
	gfc_match_omp_clauses.

	gcc/testsuite/
	* gfortran.dg/goacc/asyncwait-2.f95: Updated expected diagnostics.
	* gfortran.dg/goacc/asyncwait-3.f95: Likewise.
	* gfortran.dg/goacc/asyncwait-4.f95: Add test coverage.

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index cc4583d..8071da0 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -397,43 +397,67 @@ cleanup:
 }
 
 static match
-match_oacc_clause_gang (gfc_omp_clauses *cp)
+match_oacc_clause_gwv (gfc_omp_clauses *cp, unsigned gwv)
 {
   match ret = MATCH_YES;
 
   if (gfc_match (" ( ") != MATCH_YES)
 return MATCH_NO;
 
-  /* The gang clause accepts two optional arguments, num and static.
- The num argument may either be explicit (num: ) or
- implicit without ( without num:).  */
-
-  while (ret == MATCH_YES)
+  if (gwv == GOMP_DIM_GANG)
 {
-  if (gfc_match (" static :") == MATCH_YES)
+/* The gang clause accepts two optional arguments, num and static.
+	 The num argument may either be explicit (num: ) or
+	 implicit without ( without num:).  */
+
+  while (ret == MATCH_YES)
 	{
-	  if (cp->gang_static)
-	return MATCH_ERROR;
+	  if (gfc_match (" static :") == MATCH_YES)
+	{
+	  if (cp->gang_static)
+		return MATCH_ERROR;
+	  else
+		cp->gang_static = true;
+	  if (gfc_match_char ('*') == MATCH_YES)
+		cp->gang_static_expr = NULL;
+	  else if (gfc_match (" %e ", >gang_static_expr) != MATCH_YES)
+		return MATCH_ERROR;
+	}
 	  else
-	cp->gang_static = true;
-	  if (gfc_match_char ('*') == MATCH_YES)
-	cp->gang_static_expr = NULL;
-	  else if (gfc_match (" %e ", >gang_static_expr) != MATCH_YES)
-	return MATCH_ERROR;
-	}
-  else
-	{
-	  /* This is optional.  */
-	  if (cp->gang_num_expr || gfc_match (" num :") == MATCH_ERROR)
-	return MATCH_ERROR;
-	  else if (gfc_match (" %e ", >gang_num_expr) != MATCH_YES)
-	return MATCH_ERROR;
+	{
+	  if (cp->gang_num_expr)
+		return MATCH_ERROR;
+
+	  /* The 'num' argument is optional.  */
+	  gfc_match (" num :");
+
+	  if (gfc_match (" %e ", >gang_num_expr) != MATCH_YES)
+		return MATCH_ERROR;
+	}
+
+	  ret = gfc_match (" , ");
 	}
+}
+  else if (gwv == GOMP_DIM_WORKER)
+{
+  /* The 'num' argument is optional.  */
+  gfc_match (" num :");
+
+  if (gfc_match (" %e ", >worker_expr) != MATCH_YES)
+	return MATCH_ERROR;
+}
+  else if (gwv == GOMP_DIM_VECTOR)
+{
+  /* The 'length' argument is optional.  */
+  gfc_match (" length :");
 
-  ret = gfc_match (" , ");
+  if (gfc_match (" %e ", >vector_expr) != MATCH_YES)
+	return MATCH_ERROR;
 }
+  else
+gfc_fatal_error ("Unexpected OpenACC parallelism.");
 
-  return gfc_match (" ) ");
+  return gfc_match (" )");
 }
 
 static match
@@ -683,14 +707,20 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, uint64_t mask,
 	  && gfc_match ("async") == MATCH_YES)
 	{
 	  c->async = true;
-	  needs_space = false;
-	  if (gfc_match (" ( %e )", >async_expr) != MATCH_YES)
+	  match m = gfc_match (" ( %e )", >async_expr);
+	  if (m == MATCH_ERROR)
+		{
+		  gfc_current_locus = old_loc;
+		  break;
+		}
+	  else if (m == MATCH_NO)
 		{
 		  c->async_expr
 		= gfc_get_constant_expr (BT_INTEGER,
 	 gfc_default_integer_kind,
 	 _current_locus);
 		  mpz_set_si (c->async_expr->value.integer, GOMP_ASYNC_NOVAL);
+		  needs_space = true;
 		}
 	  continue;
 	}
@@ -944,9 +974,13 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, uint64_t mask,
 	  && gfc_match ("gang") == MATCH_YES)
 	{
 	  c->gang = true;
-	  if (match_oacc_clause_gang(c) == MATCH_YES)
-		needs_space = false;
-	  else
+	  match m = match_oacc_clause_gwv (c, GOMP_DIM_GANG);
+	  if (m == MATCH_ERROR)
+		{
+		  gfc_current_locus = old_loc;
+		  break;
+		}
+	  else if (m == MATCH_NO)
 		needs_space = true;
 	  continue;
 	}
@@ -1383,10 +1417,13 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, uint64_t mask,
 	  && gfc_match ("vector") == MATCH_YES)
 	{
 	  c->vector = true;
-	  if (gfc_match (" ( length : %e )", 

Re: [PATCH] fix interaction of -S and -x {c,c++}-header

2016-07-01 Thread Bernd Schmidt

On 07/01/2016 10:22 AM, Jan Beulich wrote:

Irrespective of the use of -o this so far resulted in "error: output
filename specified twice", since cc1_options already produces a -o
option when -S was specified.

gcc/
2016-07-01  Jan Beulich  

* varasm.c (get_variable_section): Validate initializer in
named .bss-like sections.


Ok with the right ChangeLog.

Bonus points if you come up with a doc patch that says what these 
-header arguments are actually supposed to achieve.



Bernd


[committed] Re: A gfortran silent "wrong code" bug in the transition from 4.9.0 -> 4.9.1, using OpenMP. (PR fortran/71717)

2016-07-01 Thread Jakub Jelinek
On Thu, Jun 30, 2016 at 10:00:23PM +0200, Toon Moene wrote:
> On 06/30/2016 08:43 PM, Jakub Jelinek wrote:
> 
> >On Thu, Jun 30, 2016 at 07:33:53PM +0200, Toon Moene wrote:
> 
> >>A colleague of mine at Meteo France, Toulouse, managed to reduce a problem
> >>he had with our common weather forecasting code when using OpenMP down to
> >>the attached code and the transition from 4.9.0 -> 4.9.1.
> >>
> >>In 4.9.1 OpenMP 4.0 was introduced. That is of course a big hammer to start
> >>looking for the culprit, but you are the best person to go to on this code.
> >
> >It is a bug, please file the PR.
> 
> It is bugzilla number 71717 (see
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71717)
> 
> Thanks for looking into this.

Fixed thusly, committed to trunk so far after bootstrap/regtest on
x86_64-linux and i686-linux, queued for backporting.

2016-07-01  Jakub Jelinek  

PR fortran/71717
* trans-openmp.c (gfc_omp_privatize_by_reference): Return false
for GFC_DECL_ASSOCIATE_VAR_P with POINTER_TYPE.

* testsuite/libgomp.fortran/associate3.f90: New test.

--- gcc/fortran/trans-openmp.c.jj   2016-06-30 19:39:19.0 +0200
+++ gcc/fortran/trans-openmp.c  2016-07-01 12:57:22.960295589 +0200
@@ -61,6 +61,7 @@ gfc_omp_privatize_by_reference (const_tr
   if (GFC_DECL_GET_SCALAR_POINTER (decl)
  || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
  || GFC_DECL_CRAY_POINTEE (decl)
+ || GFC_DECL_ASSOCIATE_VAR_P (decl)
  || VOID_TYPE_P (TREE_TYPE (TREE_TYPE (decl
return false;
 
--- libgomp/testsuite/libgomp.fortran/associate3.f90.jj 2016-07-01 
13:36:26.208044233 +0200
+++ libgomp/testsuite/libgomp.fortran/associate3.f902016-07-01 
13:45:07.274602305 +0200
@@ -0,0 +1,20 @@
+! PR fortran/71717
+! { dg-do run }
+
+  type t
+real, allocatable :: f(:)
+  end type
+  type (t) :: v
+  integer :: i, j
+  allocate (v%f(4))
+  v%f = 19.
+  i = 5
+  associate (u => v, k => i)
+  !$omp parallel do
+  do j = 1, 4
+u%f(j) = 21.
+if (j.eq.1) k = 7
+  end do
+  end associate
+  if (any (v%f(:).ne.21.) .or. i.ne.7) call abort
+end


Jakub


[PATCH] Fix hangs with long double atomics (PR middle-end/71716)

2016-07-01 Thread Jakub Jelinek
Hi!

As mentioned in the PR, atomics on types that contain some inner padding
(e.g. long double type on x86_64) have various issues.
Before my ATOMIC_COMPARE_EXCHANGE internal-fn addition, unless the user
uses atomic_compare_exchange* APIs and changes (e.g. copies elsewhere) the
expected variable, what can worst case happen is that atomic_load (done
using integral type covering all the bits) reads a value, then it is VCEd to
long double and stored as long double into the "expected" variable and
during this store, we can end up with random padding bits in the "expected"
variable and the first CAS can fail because of that.  But, with the RTL
store of the new "expected" value which is done using the integral type
the padding bits will have all proper values in the second iteration
already.
While with ATOMIC_COMPARE_EXCHANGE, the VIEW_CONVERT_EXPRs are visible in
GIMPLE and it is possible that all iterations have some issues in the
padding bits.

Thus, the following patch just disables the optimization for floating point
types (float/double perhaps might be fine, but I'm worried about sNaNs),
and also any other types that can contain some padding.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-07-01  Jakub Jelinek  

PR middle-end/71716
* gimple-fold.c (optimize_atomic_compare_exchange_p): Return false
for SCALAR_FLOAT_TYPE_P type of expected var, or if TYPE_PRECISION
is different from mode's bitsize.  Small cleanup.

--- gcc/gimple-fold.c.jj2016-06-28 10:26:52.0 +0200
+++ gcc/gimple-fold.c   2016-07-01 11:29:57.700252754 +0200
@@ -2984,12 +2984,19 @@ optimize_atomic_compare_exchange_p (gimp
 
   tree expected = gimple_call_arg (stmt, 1);
   if (TREE_CODE (expected) != ADDR_EXPR
-  || !SSA_VAR_P (TREE_OPERAND (expected, 0))
-  || !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0)))
+  || !SSA_VAR_P (TREE_OPERAND (expected, 0)))
+return false;
+
+  tree etype = TREE_TYPE (TREE_OPERAND (expected, 0));
+  if (!is_gimple_reg_type (etype)
   || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl)
-  || TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0)))
-  || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE
-  || TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE)
+  || TREE_THIS_VOLATILE (etype)
+  || VECTOR_TYPE_P (etype)
+  || TREE_CODE (etype) == COMPLEX_TYPE
+  /* Don't optimize floating point expected vars, VIEW_CONVERT_EXPRs
+might not preserve all the bits.  See PR71716.  */
+  || SCALAR_FLOAT_TYPE_P (etype)
+  || TYPE_PRECISION (etype) != GET_MODE_BITSIZE (TYPE_MODE (etype)))
 return false;
 
   tree weak = gimple_call_arg (stmt, 3);
@@ -3005,8 +3012,7 @@ optimize_atomic_compare_exchange_p (gimp
   && optab_handler (sync_compare_and_swap_optab, mode) == CODE_FOR_nothing)
 return false;
 
-  if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0)))
-  != GET_MODE_SIZE (mode))
+  if (int_size_in_bytes (etype) != GET_MODE_SIZE (mode))
 return false;
 
   return true;

Jakub


[RFC] Change order of Fortran BLOCK_VARS (PR fortran/71687, take 2)

2016-07-01 Thread Jakub Jelinek
On Thu, Jun 30, 2016 at 08:06:54PM +0200, Jakub Jelinek wrote:
> So, is it intentional that pushdecl / getdecls acts like a LIFO?
> Though, it seems user vars are pushdecled in the reverse order of
> declarations, but gfc_add_decl_to_function is called in the user declared
> order, so perhaps for the following patch we'd also need to
> decl = nreverse (saved_function_decls);
> in gfc_generate_function_code and similarly in gfc_process_block_locals
> decl = nreverse (saved_local_decls);

Here is an updated patch with the above mentioned two calls to nreverse
added.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-07-01  Jakub Jelinek  

PR fortran/71687
* f95-lang.c (struct binding_level): Add reversed field.
(clear_binding_level): Adjust initializer.
(getdecls): If reversed is clear, set it and nreverse the names
chain before returning it.
(poplevel): Use getdecls.
* trans-decl.c (gfc_generate_function_code, gfc_process_block_locals):
Use nreverse to pushdecl decls in the declaration order.

* gfortran.dg/gomp/pr71687.f90: New test.

--- gcc/fortran/f95-lang.c.jj   2016-06-30 19:38:38.296737997 +0200
+++ gcc/fortran/f95-lang.c  2016-07-01 12:12:24.821306073 +0200
@@ -286,6 +286,9 @@ binding_level {
   tree blocks;
   /* The binding level containing this one (the enclosing binding level).  */
   struct binding_level *level_chain;
+  /* True if nreverse has been already called on names; if false, names
+ are ordered from newest declaration to oldest one.  */
+  bool reversed;
 };
 
 /* The binding level currently in effect.  */
@@ -296,7 +299,7 @@ static GTY(()) struct binding_level *cur
 static GTY(()) struct binding_level *global_binding_level;
 
 /* Binding level structures are initialized by copying this one.  */
-static struct binding_level clear_binding_level = { NULL, NULL, NULL };
+static struct binding_level clear_binding_level = { NULL, NULL, NULL, false };
 
 
 /* Return true if we are in the global binding level.  */
@@ -310,6 +313,11 @@ global_bindings_p (void)
 tree
 getdecls (void)
 {
+  if (!current_binding_level->reversed)
+{
+  current_binding_level->reversed = true;
+  current_binding_level->names = nreverse (current_binding_level->names);
+}
   return current_binding_level->names;
 }
 
@@ -347,7 +355,7 @@ poplevel (int keep, int functionbody)
  binding level that we are about to exit and which is returned by this
  routine.  */
   tree block_node = NULL_TREE;
-  tree decl_chain = current_binding_level->names;
+  tree decl_chain = getdecls ();
   tree subblock_chain = current_binding_level->blocks;
   tree subblock_node;
 
--- gcc/fortran/trans-decl.c.jj 2016-05-09 10:20:26.0 +0200
+++ gcc/fortran/trans-decl.c2016-07-01 12:25:30.863415372 +0200
@@ -6277,7 +6277,7 @@ gfc_generate_function_code (gfc_namespac
gfc_finish_block ());
 
   /* Add all the decls we created during processing.  */
-  decl = saved_function_decls;
+  decl = nreverse (saved_function_decls);
   while (decl)
 {
   tree next;
@@ -6469,7 +6469,7 @@ gfc_process_block_locals (gfc_namespace*
   if (flag_coarray == GFC_FCOARRAY_LIB && has_coarray_vars)
 generate_coarray_init (ns);
 
-  decl = saved_local_decls;
+  decl = nreverse (saved_local_decls);
   while (decl)
 {
   tree next;
--- gcc/testsuite/gfortran.dg/gomp/pr71687.f90.jj   2016-07-01 
12:12:24.821306073 +0200
+++ gcc/testsuite/gfortran.dg/gomp/pr71687.f90  2016-07-01 12:12:24.821306073 
+0200
@@ -0,0 +1,11 @@
+! PR fortran/71687
+! { dg-do compile }
+! { dg-additional-options "-fstack-arrays -O2" }
+
+subroutine s (n, x)
+   integer :: n
+   real :: x(n)
+!$omp parallel
+   x(1:n) = x(n:1:-1)
+!$omp end parallel
+end

Jakub


[PATCH] error on missing LTO symbols

2016-07-01 Thread Cesar Philippidis
Both OpenMP and OpenACC allow the user to call functions and access
global variables. Usually, the user needs to explicitly mark those
functions and variables as offloadable. However, for certain functions,
such as those in libc, libm, etc., it makes sense to allow the user call
functions and use variables that haven't been marked as offloadable.
This patch teaches the lto streamer how to treat missing symbols as
errors instead of assertion failures.

Is this patch OK for trunk and gcc6? Or should we keep the assertion
failure when -fopenacc, -fopenmp, or -fopenmp-simd isn't set?

This patch was originally posted last year:
. I was trying
to avoid it for OpenACC, but making non-acc routine calls errors would
complicate library functions.

Cesar
2016-07-01  Cesar Philippidis  

	gcc/
	* lto-cgraph.c (input_overwrite_node): Change the assertion to an
	error for missing symbols.
	(input_varpool_node): Likewise.


diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 5cef2ba..552ea6b 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -1201,9 +1201,11 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
  LDPR_NUM_KNOWN);
   node->instrumentation_clone = bp_unpack_value (bp, 1);
   node->split_part = bp_unpack_value (bp, 1);
-  gcc_assert (flag_ltrans
-	  || (!node->in_other_partition
-		  && !node->used_from_other_partition));
+
+  int success = flag_ltrans || (!node->in_other_partition
+&& !node->used_from_other_partition);
+  if (!success)
+error ("Missing %<%s%>", node->name ());
 }
 
 /* Return string alias is alias of.  */
@@ -1416,9 +1418,11 @@ input_varpool_node (struct lto_file_decl_data *file_data,
 node->set_section_for_node (section);
   node->resolution = streamer_read_enum (ib, ld_plugin_symbol_resolution,
 	LDPR_NUM_KNOWN);
-  gcc_assert (flag_ltrans
-	  || (!node->in_other_partition
-		  && !node->used_from_other_partition));
+
+  int success = flag_ltrans || (!node->in_other_partition
+&& !node->used_from_other_partition);
+  if (!success)
+error ("Missing %<%s%>", node->name ());
 
   return node;
 }


[PATCH] S/390: Add support for z13 instructions lochi and locghi.

2016-07-01 Thread Dominik Vogt
The attached patch adds patterns to make use of the z13 LOCHI and
LOCGHI instructions.

Tested on s390x biarch and s390, regression tested on s390x.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
gcc/ChangeLog

* config/s390/s390.md: Add "z13" cpu_facility.
("*movcc"): Add support for z13 instructions lochi and locghi.
* config/s390/predicates.md ("loc_operand"): New predicate for "load on
condition" type instructions.
gcc/testsuite/ChangeLog

* gcc.target/s390/vector/vec-scalar-cmp-1.c: Expect lochi instead of
locr.
* gcc.target/s390/loc-2.c: New test.
* gcc.target/s390/loc-1.c: New test.
>From 5136f74f61dc3df0229d43d5f13017280838a011 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Wed, 25 May 2016 11:47:00 +0100
Subject: [PATCH] S/390: Add support for z13 instructions lochi and locghi.

---
 gcc/config/s390/predicates.md  |  7 +++
 gcc/config/s390/s390.md| 24 ++
 gcc/testsuite/gcc.target/s390/loc-1.c  | 20 ++
 gcc/testsuite/gcc.target/s390/loc-2.c  | 20 ++
 .../gcc.target/s390/vector/vec-scalar-cmp-1.c  |  4 ++--
 5 files changed, 65 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/loc-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/loc-2.c

diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index e66f4a4..75e4cb8 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -182,6 +182,13 @@
   return s390_contiguous_bitmask_p (INTVAL (op), GET_MODE_BITSIZE (mode), NULL, NULL);
 })
 
+;; Return true if OP is ligitimate for any LOC instruction.
+
+(define_predicate "loc_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+  (and (match_code "const_int")
+	   (match_test "INTVAL (op) <= 32767 && INTVAL (op) >= -32768"
+
 ;; operators --
 
 ;; Return nonzero if OP is a valid comparison operator
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index f8c61a8..6d8d041 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -483,7 +483,7 @@
   (const (symbol_ref "s390_tune_attr")))
 
 (define_attr "cpu_facility"
-  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vec"
+  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vec,z13"
   (const_string "standard"))
 
 (define_attr "enabled" ""
@@ -528,7 +528,12 @@
 
  (and (eq_attr "cpu_facility" "vec")
   (match_test "TARGET_VX"))
-	 (const_int 1)]
+	 (const_int 1)
+
+ (and (eq_attr "cpu_facility" "z13")
+  (match_test "TARGET_Z13"))
+	 (const_int 1)
+	 ]
 	(const_int 0)))
 
 ;; Pipeline description for z900.  For lack of anything better,
@@ -6309,21 +6314,23 @@
  XEXP (operands[1], 1));
 })
 
-; locr, loc, stoc, locgr, locg, stocg
+; locr, loc, stoc, locgr, locg, stocg, lochi, locghi
 (define_insn_and_split "*movcc"
-  [(set (match_operand:GPR 0 "nonimmediate_operand"   "=d,d,d,d,S,S,")
+  [(set (match_operand:GPR 0 "nonimmediate_operand"   "=d,d,d,d,d,d,S,S,")
 	(if_then_else:GPR
 	  (match_operator 1 "s390_comparison"
-	[(match_operand 2 "cc_reg_operand"" c,c,c,c,c,c,c")
+	[(match_operand 2 "cc_reg_operand"" c,c,c,c,c,c,c,c,c")
 	 (match_operand 5 "const_int_operand" "")])
-	  (match_operand:GPR 3 "nonimmediate_operand" " d,0,S,0,d,0,S")
-	  (match_operand:GPR 4 "nonimmediate_operand" " 0,d,0,S,0,d,S")))]
+	  (match_operand:GPR 3 "loc_operand" " d,0,S,0,K,0,d,0,S")
+	  (match_operand:GPR 4 "loc_operand" " 0,d,0,S,0,K,0,d,S")))]
   "TARGET_Z196"
   "@
locr%C1\t%0,%3
locr%D1\t%0,%4
loc%C1\t%0,%3
loc%D1\t%0,%4
+   lochi%C1\t%0,%h3
+   lochi%D1\t%0,%h4
stoc%C1\t%3,%0
stoc%D1\t%4,%0
#"
@@ -6340,7 +6347,8 @@
 	 (match_dup 0)
 	 (match_dup 4)))]
   ""
-  [(set_attr "op_type" "RRF,RRF,RSY,RSY,RSY,RSY,*")])
+  [(set_attr "op_type" "RRF,RRF,RSY,RSY,RIE,RIE,RSY,RSY,*")
+   (set_attr "cpu_facility" "*,*,*,*,z13,z13,*,*,*")])
 
 ;;
 ;;- Multiply instructions.
diff --git a/gcc/testsuite/gcc.target/s390/loc-1.c b/gcc/testsuite/gcc.target/s390/loc-1.c
new file mode 100644
index 000..69f9c04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/loc-1.c
@@ -0,0 +1,20 @@
+/* Test load on condition patterns.  */
+
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O3 -march=z13 -mzarch -fno-asynchronous-unwind-tables" } */
+
+unsigned long locgr (unsigned long rc, unsigned long cond, unsigned long val)
+{
+  if (cond)
+rc = val;
+  return rc;
+}
+/* { dg-final { scan-assembler "locgr:\n\tltgr\t%r3,%r3\n\tlocgrne\t%r2,%r4\n\tbr\t%r14\n" } } */
+
+long locghi (long rc, long cond)
+{
+  if (cond)
+rc = (long)-1;
+  return rc;
+}
+/* { dg-final { scan-assembler 

[PATCH v2] extend shift count warnings to vector types

2016-07-01 Thread Jan Beulich
gcc/c/
2016-07-01  Jan Beulich  

* c-fold.c (c_fully_fold_internal): Also emit shift count
warnings for vector types.
* c-typeck.c (build_binary_op): Likewise.

gcc/testsuite/
2016-07-01  Jan Beulich  

* gcc.dg/vshift-6.c, gcc.dg/vshift-7.c: New.

--- 2016-06-30/gcc/c/c-fold.c
+++ 2016-06-30/gcc/c/c-fold.c
@@ -320,8 +320,6 @@ c_fully_fold_internal (tree expr, bool i
   if ((code == LSHIFT_EXPR || code == RSHIFT_EXPR)
  && TREE_CODE (orig_op1) != INTEGER_CST
  && TREE_CODE (op1) == INTEGER_CST
- && (TREE_CODE (TREE_TYPE (orig_op0)) == INTEGER_TYPE
- || TREE_CODE (TREE_TYPE (orig_op0)) == FIXED_POINT_TYPE)
  && TREE_CODE (TREE_TYPE (orig_op1)) == INTEGER_TYPE
  && c_inhibit_evaluation_warnings == 0)
{
@@ -330,13 +328,23 @@ c_fully_fold_internal (tree expr, bool i
(code == LSHIFT_EXPR
 ? G_("left shift count is negative")
 : G_("right shift count is negative")));
- else if (compare_tree_int (op1,
-TYPE_PRECISION (TREE_TYPE (orig_op0)))
-  >= 0)
+ else if ((TREE_CODE (TREE_TYPE (orig_op0)) == INTEGER_TYPE
+   || TREE_CODE (TREE_TYPE (orig_op0)) == FIXED_POINT_TYPE)
+  && compare_tree_int (op1,
+   TYPE_PRECISION (TREE_TYPE (orig_op0)))
+ >= 0)
warning_at (loc, OPT_Wshift_count_overflow,
(code == LSHIFT_EXPR
 ? G_("left shift count >= width of type")
 : G_("right shift count >= width of type")));
+ else if (TREE_CODE (TREE_TYPE (orig_op0)) == VECTOR_TYPE
+  && compare_tree_int (op1,
+   TYPE_PRECISION (TREE_TYPE (TREE_TYPE 
(orig_op0
+ >= 0)
+   warning_at (loc, OPT_Wshift_count_overflow,
+   code == LSHIFT_EXPR
+   ? G_("left shift count >= width of vector element")
+   : G_("right shift count >= width of vector element"));
}
   if (code == LSHIFT_EXPR
  /* If either OP0 has been folded to INTEGER_CST...  */
--- 2016-06-30/gcc/c/c-typeck.c
+++ 2016-06-30/gcc/c/c-typeck.c
@@ -10985,21 +10985,16 @@ build_binary_op (location_t location, en
 Also set SHORT_SHIFT if shifting rightward.  */
 
 case RSHIFT_EXPR:
-  if (code0 == VECTOR_TYPE && code1 == INTEGER_TYPE
-  && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE)
-{
-  result_type = type0;
-  converted = 1;
-}
-  else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE
-  && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE
-  && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE
-  && TYPE_VECTOR_SUBPARTS (type0) == TYPE_VECTOR_SUBPARTS (type1))
+  if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE
+ && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE
+ && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE
+ && TYPE_VECTOR_SUBPARTS (type0) == TYPE_VECTOR_SUBPARTS (type1))
{
  result_type = type0;
  converted = 1;
}
-  else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE)
+  else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE
+   || code0 == VECTOR_TYPE)
   && code1 == INTEGER_TYPE)
{
  doing_shift = true;
@@ -11012,6 +11007,18 @@ build_binary_op (location_t location, en
warning_at (location, OPT_Wshift_count_negative,
"right shift count is negative");
}
+ else if (code0 == VECTOR_TYPE)
+   {
+ if (compare_tree_int (op1,
+   TYPE_PRECISION (TREE_TYPE (type0)))
+ >= 0)
+   {
+ int_const = false;
+ if (c_inhibit_evaluation_warnings == 0)
+   warning_at (location, OPT_Wshift_count_overflow,
+   "right shift count >= width of vector 
element");
+   }
+   }
  else
{
  if (!integer_zerop (op1))
@@ -11035,21 +11042,16 @@ build_binary_op (location_t location, en
   break;
 
 case LSHIFT_EXPR:
-  if (code0 == VECTOR_TYPE && code1 == INTEGER_TYPE
-  && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE)
-{
-  result_type = type0;
-  converted = 1;
-}
-  else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE
-  && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE
-  && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE
-  && TYPE_VECTOR_SUBPARTS (type0) == 

Re: [PATCH v2] check initializer to be zero in .bss-like sections

2016-07-01 Thread Bernd Schmidt

On 07/01/2016 03:57 PM, Jan Beulich wrote:

Do I need to re-submit, or can I take the above as approved-with-
that-change?


Ok with that change.


Bernd


Re: [PATCH v2] check initializer to be zero in .bss-like sections

2016-07-01 Thread Jan Beulich
>>> On 01.07.16 at 15:44,  wrote:
> On 07/01/2016 03:42 PM, Jan Beulich wrote:
> On 01.07.16 at 15:36,  wrote:
> 
>>> Looks ok, except why the empty dg-options string in the testcase?
>>
>> Because I've seen in it that way in various other test cases (and
>> yes, yet others don't have it). I had to decide for one of the
>> variants, and if it's not required (nor wanted) I can certainly drop
>> it.
> 
> Yes, if there's no real reason, drop it.

Do I need to re-submit, or can I take the above as approved-with-
that-change?

Jan



Re: [PATCH v2] check initializer to be zero in .bss-like sections

2016-07-01 Thread Bernd Schmidt

On 07/01/2016 03:42 PM, Jan Beulich wrote:

On 01.07.16 at 15:36,  wrote:



Looks ok, except why the empty dg-options string in the testcase?


Because I've seen in it that way in various other test cases (and
yes, yet others don't have it). I had to decide for one of the
variants, and if it's not required (nor wanted) I can certainly drop
it.


Yes, if there's no real reason, drop it.


Bernd


Re: [PATCH v2] check initializer to be zero in .bss-like sections

2016-07-01 Thread Jan Beulich
>>> On 01.07.16 at 15:36,  wrote:
> On 07/01/2016 10:21 AM, Jan Beulich wrote:
>> Just like gas, which has recently learned to reject such initializers,
>> gcc shouldn't accept such either.
>> ---
>> v2: Use dg-require-named-sections.
>>
>> gcc/
>> 2016-07-01  Jan Beulich  
>>
>>  * varasm.c (get_variable_section): Validate initializer in
>>  named .bss-like sections.
>>
>> gcc/testsuite/
>> 2016-07-01  Jan Beulich  
>>
>>  * gcc.dg/bss.c: New.
> 
> Looks ok, except why the empty dg-options string in the testcase?

Because I've seen in it that way in various other test cases (and
yes, yet others don't have it). I had to decide for one of the
variants, and if it's not required (nor wanted) I can certainly drop
it.

Jan



Re: [PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2016-07-01 Thread Ramana Radhakrishnan


On 13/10/15 18:01, Andre Vieira wrote:
> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings 
> (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed 
> by ARM under Free BSD license.
> 
> The new aeabi_idiv routine is used to replace the one in 
> libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 
> wrapper. The new routine is under LGPLv3 license.

This is not under LGPLv3 . It is under GPLv3 with the runtime library exception 
license, there's a difference. Assuming your licensing expectation is ok  
read on for more of a review.

> 
> The main advantage of this version is that it can improve the performance of 
> the aeabi_idiv function for Thumb1. This solution will also increase the code 
> size. So it will only be used if __OPTIMIZE_SIZE__ is not defined.
> 
> Make check passed for armv6-m.
> 
> libgcc/ChangeLog:
> 2015-08-10  Hale Wang  
> Andre Vieira  
> 
>   * config/arm/lib1funcs.S: Add new wrapper.
> 
> 0001-integer-division.patch
> 
> 
> From 832a3d6af6f06399f70b5a4ac3727d55960c93b7 Mon Sep 17 00:00:00 2001
> From: Andre Simoes Dias Vieira 
> Date: Fri, 21 Aug 2015 14:23:28 +0100
> Subject: [PATCH] new wrapper idivmod
> 
> ---
>  libgcc/config/arm/lib1funcs.S | 250 
> --
>  1 file changed, 217 insertions(+), 33 deletions(-)
> 
> diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
> index 
> 252efcbd5385cc58a5ce1e48c6816d36a6f4c797..c9e544114590da8cde88382bea0f67206e593816
>  100644
> --- a/libgcc/config/arm/lib1funcs.S
> +++ b/libgcc/config/arm/lib1funcs.S
> @@ -306,34 +306,12 @@ LSYM(Lend_fde):
>  #ifdef __ARM_EABI__
>  .macro THUMB_LDIV0 name signed
>  #if defined(__ARM_ARCH_6M__)
> - .ifc \signed, unsigned
> - cmp r0, #0
> - beq 1f
> - mov r0, #0
> - mvn r0, r0  @ 0x
> -1:
> - .else
> - cmp r0, #0
> - beq 2f
> - blt 3f
> +
> + push{r0, lr}
>   mov r0, #0
> - mvn r0, r0
> - lsr r0, r0, #1  @ 0x7fff
> - b   2f
> -3:   mov r0, #0x80
> - lsl r0, r0, #24 @ 0x8000
> -2:
> - .endif
> - push{r0, r1, r2}
> - ldr r0, 4f
> - adr r1, 4f
> - add r0, r1
> - str r0, [sp, #8]
> - @ We know we are not on armv4t, so pop pc is safe.
> - pop {r0, r1, pc}
> - .align  2
> -4:
> - .word   __aeabi_idiv0 - 4b
> + bl  SYM(__aeabi_idiv0)
> + pop {r1, pc}
> +

I'd still retain the comment about pop pc here because there's often a 
misconception of merging armv4t and armv6m code.

>  #elif defined(__thumb2__)
>   .syntax unified
>   .ifc \signed, unsigned
> @@ -945,7 +923,170 @@ LSYM(Lover7):
>   add dividend, work
>.endif
>  LSYM(Lgot_result):
> -.endm
> +.endm
> +
> +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
> +/* If performance is preferred, the following functions are provided.  */
> +

Comment above #if please and also check elsewhere in patch.

> +/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
> +.macro BranchToDiv n, label
> + lsr curbit, dividend, \n
> + cmp curbit, divisor
> + blo \label
> +.endm
> +
> +/* Body of div(n).  Shift the divisor in n bits and compare the divisor
> +   and dividend.  Update the dividend as the substruction result.  */
> +.macro DoDiv n
> + lsr curbit, dividend, \n
> + cmp curbit, divisor
> + bcc 1f
> + lsl curbit, divisor, \n
> + sub dividend, dividend, curbit
> +
> +1:   adc result, result
> +.endm
> +
> +/* The body of division with positive divisor.  Unless the divisor is very
> +   big, shift it up in multiples of four bits, since this is the amount of
> +   unwinding in the main division loop.  Continue shifting until the divisor
> +   is larger than the dividend.  */
> +.macro THUMB1_Div_Positive
> + mov result, #0
> + BranchToDiv #1, LSYM(Lthumb1_div1)
> + BranchToDiv #4, LSYM(Lthumb1_div4)
> + BranchToDiv #8, LSYM(Lthumb1_div8)
> + BranchToDiv #12, LSYM(Lthumb1_div12)
> + BranchToDiv #16, LSYM(Lthumb1_div16)
> +LSYM(Lthumb1_div_large_positive):
> + mov result, #0xff
> + lsl divisor, divisor, #8
> + rev result, result
> + lsr curbit, dividend, #16
> + cmp curbit, divisor
> + blo 1f
> + asr result, #8
> + lsl divisor, divisor, #8
> + beq LSYM(Ldivbyzero_waypoint)
> +
> +1:   lsr curbit, dividend, #12
> + cmp curbit, divisor
> + blo LSYM(Lthumb1_div12)
> + b   LSYM(Lthumb1_div16)
> +LSYM(Lthumb1_div_loop):
> + lsr divisor, divisor, #8
> +LSYM(Lthumb1_div16):
> + Dodiv   #15
> + Dodiv   #14
> + Dodiv   #13
> + Dodiv   #12
> +LSYM(Lthumb1_div12):
> + 

Re: [PATCH v2] check initializer to be zero in .bss-like sections

2016-07-01 Thread Bernd Schmidt

On 07/01/2016 10:21 AM, Jan Beulich wrote:

Just like gas, which has recently learned to reject such initializers,
gcc shouldn't accept such either.
---
v2: Use dg-require-named-sections.

gcc/
2016-07-01  Jan Beulich  

* varasm.c (get_variable_section): Validate initializer in
named .bss-like sections.

gcc/testsuite/
2016-07-01  Jan Beulich  

* gcc.dg/bss.c: New.


Looks ok, except why the empty dg-options string in the testcase?


Bernd



Re: [PATCH 0/6] remove some usage of rtx_{insn,expr}_list

2016-07-01 Thread Bernd Schmidt

On 06/21/2016 04:47 PM, Trevor Saunders wrote:

On Mon, Jun 20, 2016 at 06:52:35PM +0200, Bernd Schmidt wrote:



So that's a second more in real time - was the machine very busy at the time
you ran these tests so that these aren't meaningful, or is there a need to
investigate this?


Well, it was on my laptop which was running a web browser and stuff.  I
wasn't aware of it being busy, but it also wasn't a extra stable
machine.  I also noticed a bit of variance within the same configuration
so I'm not terribly concerned, but it is odd.


I ran some tests myself, and while they were somewhat inconclusive, they 
also didn't give me reason to think there's something to worry about. So 
ok for the first four patches.



Bernd


unrecognized non-alpha-numeric characters in asm() constraints

2016-07-01 Thread Jan Beulich
Hello,

is there a reason why in output constraints such gets accepted silently
(by parse_output_constraint()) while parse_input_constraint() issues
an error if it encounters any? The latter behavior seems more forward
compatible to me (such that if any such characters acquire meaning,
one can probe the compiler accordingly).

Jan



Re: Determine more IVs to be non-overflowing

2016-07-01 Thread Jan Hubicka
> > Index: tree-scalar-evolution.c
> > ===
> > --- tree-scalar-evolution.c (revision 237856)
> > +++ tree-scalar-evolution.c (working copy)
> > @@ -280,6 +280,7 @@ along with GCC; see the file COPYING3.
> >  #include "params.h"
> >  #include "tree-ssa-propagate.h"
> >  #include "gimple-fold.h"
> > +#include "print-tree.h"
> 
> Don't see you need this.

Yes, i forgot this from debugging.
> 
> >  static tree analyze_scalar_evolution_1 (struct loop *, tree, tree);
> >  static tree analyze_scalar_evolution_for_address_of (struct loop *loop,
> > @@ -3309,6 +3310,60 @@ scev_reset (void)
> >  }
> >  }
> >  
> > +/* Return true if the IV calculation in TYPE can overflow based on the 
> > knowledge
> > +   of the upper bound on the number of iterations of LOOP, the BASE and 
> > STEP
> > +   of IV.
> > +
> > +   We do not use information whether TYPE can overflow so it is safe to
> > +   use this test even for derived IVs not computed every iteration or
> > +   hypotetical IVs to be inserted into code.  */
> > +
> > +bool
> > +iv_can_overflow_p (struct loop *loop, tree type, tree base, tree step)
> 
> Exporting this is also not necessary?

The reason why I export this is that incrementally I plan to use it in ivopts.
It is constructing its own candidates and also needs to know if they will
overflow or not.

I will drop it for now and include it in incremental patch.
> 
> > +{
> > +  widest_int nit;
> > +  wide_int base_min, base_max, step_min, step_max, type_min, type_max;
> > +  signop sgn = TYPE_SIGN (type);
> > +  signop base_sgn = TYPE_SIGN (TREE_TYPE (base));
> > +  signop step_sgn = TYPE_SIGN (TREE_TYPE (step));
> > +
> > +  if (step == 0)
> > +return false;
> 
> Err - you probably mean
> 
>  if (integer_zerop (step))
> 
> here?  your check is a NULL pointer check ...

Yes, sorry. inteer_zerop (step).  Probably does not matter.

> > +  type_min = wi::min_value (type);
> > +  type_max = wi::max_value (type);
> > +  /* Watch overflow.  */
> > +  if ((widest_int)1 << TYPE_PRECISION (type) < nit)
> > +return true;
> 
> TYPE_PRECISION (type) - 1?  The comment can be improved I think.

For type==char I think TYPE_PRECISION should be 8 and useful nit values are in
range 0...255 (because of the +1 addition bellow). So I think it should be
  if ((widest_int)1 << TYPE_PRECISION (type) < nit)

> 
> > +  if ((widest_int::from (base_max, base_sgn)
> > +   + widest_int::from (step_max, step_sgn) * (nit + 1))
> > +   > widest_int::from (type_max, sgn)
> > +  || (widest_int::from (type_min, sgn)
> > + > (widest_int::from (base_min, base_sgn)
> > ++ widest_int::from (step_min, step_sgn) * (nit + 1
> > +return true;
> 
> and this lacks any comment...  so it decodes to
> 
>  (base_max + step_max * (nit + 1) > type_max)
>  || (type_min > base_min + step_min * (nit + 1))

Yes, it is trying to compute the final value of IV when loop reaches maximal
number of iteration and check that it is in the range of the target type.
> 
> and it basically assumes infinite precision arithmetic for
> the computation.  As mentioned previously for __int128 widest_int
> does _not_ guarantee this so you need to use FIXED_WIDE_INT
> with a precision of WIDE_INT_MAX_PRECISION * 2 (+1?) like VRP does.

With the NIT check I know that all 4 values are representable in a target type.

 3) widest_int.  This representation is an approximation of
 infinite precision math.  However, it is not really infinite
 precision math as in the GMP library.  It is really finite
 precision math where the precision is 4 times the size of the
 largest integer that the target port can represent.

My understanding is that to do the mutiplcation I need two times of the bits of
TYPE_PRECISION (type) (+2 for the sign and addition perhaps) and widest_int has
4 times. If this is not true, perhaps the comment can be made more explicit?

Thanks for the pointer to vrp, now at least I see how the widening/explicit
precision work.

I suppose then the type should be FIXED_WIDE_INT (WIDE_INT_MAX_PRECISION * 2 + 
2)?
> 
> Note as both type_min/max and base_min/max are wide_ints the above
> might be simplified
> 
>   step_max * (nit + 1) > type_max - base_max
> 
> where the subtraction can be carried out in wide_int(?) and you
> should also CSE nit + 1 or transform it further to
> 
>   step_max * nit > type_max - base_max - step_max
> 
> where if I understand correctly, if type_max - base_max - step_max
> underflows we already wrap.

Yes, this look like a good idea and yes, if type_max-base_max-step_max is
negative, we wrap.  I suppose even with some inlining doing manual CSE is not
bad plan.

Thanks, wide_int.h is still bit of black magic to me ;)
Honza
> 
> Thanks,
> Richard.


Re: [PATCH PR c/71699] Handle pointer arithmetic in nonzero tree checks

2016-07-01 Thread Manish Goregaokar
Added a test:

gcc/ChangeLog:
PR c/71699
* fold-const.c (tree_binary_nonzero_warnv_p): Allow
pointer addition to also be considered nonzero.

gcc/testsuite/ChangeLog:
PR c/71699
* c-c++-common/pointer-addition-nonnull.c: New test for
pointer addition.
---
 gcc/fold-const.c|  3 +++
 .../c-c++-common/pointer-addition-nonnull.c | 21 +
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/c-c++-common/pointer-addition-nonnull.c

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 3b9500d..0d82018 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -13199,6 +13199,9 @@ tree_binary_nonzero_warnv_p (enum tree_code code,
   switch (code)
 {
 case POINTER_PLUS_EXPR:
+  return flag_delete_null_pointer_checks
+&& (tree_expr_nonzero_warnv_p (op0, strict_overflow_p)
+|| tree_expr_nonzero_warnv_p (op1, strict_overflow_p));
 case PLUS_EXPR:
   if (ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_UNDEFINED (type))
 {
diff --git a/gcc/testsuite/c-c++-common/pointer-addition-nonnull.c
b/gcc/testsuite/c-c++-common/pointer-addition-nonnull.c
new file mode 100644
index 000..10bc04c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pointer-addition-nonnull.c
@@ -0,0 +1,21 @@
+/* { dg-options "-c -O2 -Wall" } */
+
+char *xstrdup (const char *) __attribute__ ((__returns_nonnull__));
+
+#define PREFIX "some "
+
+int
+main ()
+{
+  char *saveptr;
+  char *name = xstrdup (PREFIX "name");
+
+  char *tail = name + sizeof (PREFIX) - 1;
+
+  if (tail == 0)
+tail = saveptr;
+  while (*tail == ' ')
+++tail;
+
+  return 0;
+}
\ No newline at end of file
-- 
2.8.3

-Manish


On Fri, Jul 1, 2016 at 1:40 PM, Richard Biener
 wrote:
> On Thu, Jun 30, 2016 at 5:14 PM, Marc Glisse  wrote:
>> On Thu, 30 Jun 2016, Richard Biener wrote:
>>
>>> points-to analysis already has the constraint that POINTER_PLUS_EXPR
>>> cannot leave the object op0 points to.  Of course currently nothing uses the
>>> fact whether points-to computes pointed-to as nothing (aka NULL) - so the
>>> argument may be moot.
>>>
>>> Anyway, one of my points to the original patch was that POINTER_PLUS_EXPR
>>> handling should be clearly separate from PLUS_EXPR and that we have
>>> flag_delete_null_pointer_checks to allow targest to declare that 0 is a
>>> valid
>>> object pointer (and thus you can do 4 + -4 and reach NULL).
>>
>>
>> Thanks. So the tricky point is that we are not allowed to transform g into f
>> below:
>>
>> char*f(char*p){return p+4;}
>> char*g(char*p){return (char*)((intptr_t)p+4);}
>>
>> That makes sense and seems much easier to guarantee than I feared, nice.
>>
>> (on the other hand, only RTL is able to simplify (long)p+4-(long)(p+4))
>
> Hmm, yeah - we have some match.pd stuff to handle these kind of cases,
> like p + ((long)p2 - (long)p1)) and also (long)(p + x) - (long)p.
>
> OTOH to handle (long)p + 4 - (long)(p + 4) the only thing we need is to
> transform (long)(p + 4) to (long)p + 4 ... that would simplify things but
> of course we cannot ever undo that canonicalization if the result is
> ever converted back to a pointer.  But maybe we can value-number it
> the same with some tricks... (might be worth to file a bugreport)
>
> Richard.
>
>> --
>> Marc Glisse


Re: [Patch] Disable text mode translation in ada for Cygwin

2016-07-01 Thread JonY
On 7/1/2016 20:00, Arnaud Charlet wrote:
>> 
>> ping2? Is there a dedicated list for ADA patches?
> 
> This list is for submitting patches, which you have done, it is not
> really about pinging for commits, which should preferably be done by
> the submitter, after proper testing.
> 
> I do not have a setup to test cygwin changes, so cannot do it for
> you.
> 

I have already tested it on Cygwin before submitting the test.



signature.asc
Description: OpenPGP digital signature


Re: [PATCH GCC]Resolve compilation time known alias checks in vectorizer

2016-07-01 Thread Richard Biener
On Tue, Jun 28, 2016 at 8:18 AM, Bin.Cheng  wrote:
> On Tue, Jun 14, 2016 at 1:57 PM, Richard Biener
>  wrote:
>> On Mon, Jun 13, 2016 at 12:01 PM, Bin Cheng  wrote:
>>> Hi,
>>> GCC vectorizer generates many unnecessary runtime alias checks known at 
>>> compilation time.  For some data-reference pairs, alias relation can be 
>>> resolved at compilation time, in this case, we can simply skip the alias 
>>> check.  For some other data-reference pairs, alias relation can be realized 
>>> at compilation time, in this case, we should return false and simply skip 
>>> vectorizing the loop.  For the second case, the corresponding loop is 
>>> vectorized for nothing because the guard alias condition is bound to false 
>>> anyway.  Vectorizing it not only wastes compilation time, but also slows 
>>> down generated code because GCC fails to resolve these "false" alias check 
>>> after vectorization.  Even in cases it can be resolved (by VRP), GCC fails 
>>> to cleanup all the mess generated in loop versioning.
>>> This looks like a common issue in spec2k6.  For example, in 
>>> 434.zeusmp/ggen.f, there are three loops vectorized but never executed; in 
>>> 464.h264ref, there are loops in which all runtime alias checks are resolved 
>>> at compilation time thus loop versioning is proven unnecessary.  Statistic 
>>> data also shows that about >100 loops are falsely vectorized currently in 
>>> my build of spec2k6.
>>>
>>> This patch is based on 
>>> https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00399.html, bootstrap and 
>>> test on x86_64 and AArch64 (ongoing), is it OK?
>>
>> This is PR71060 and PR65206?
>>
>> Rather than patching up vect_prune_runtime_alias_test_list to handle
>> runtime known _not_ aliasing (does that happen?  for one of the
>> testcases?) I'd patch vect_analyze_data_ref_dependence for that case.
>> Yes, we can have cases where the runtime check generated
>> is imprecise enough that it is proved to always alias, thus handling
>> these cases seems fine (in which case handling the other is
>> trivial).
>>
>> So I'm generally fine with the patch but please check the above PRs
>> and eventually add testcases from them.
> Hi,
> The two PRs you mentioned is the case which is proved to always alias.
> Before this patch, the loop is vectorized, alias check is generated
> and then simplified into false, at last, the versioned loop gets
> deleted.  With this patch, it proves always alias earlier and we won't
> do the useless versioning any more.  And I checked spec, there are
> quite a lot compile time no-alias cases.
>
> Here is the updated patch wrto your comments.  Is it OK?

Ok.

Thanks,
Richard.

> Thanks,
> bin
>
>>
>> +/* Function vect_no_alias_p.
>> +
>> +   Given data references A and B whose alias relation is known at
>>
>> A and B with equal base and offset
>>
>> +   compilation time, return TRUE if they do not alias to each other;
>> +   return FALSE if they do.  SEGMENT_LENGTH_A and SEGMENT_LENGTH_B
>> +   are the memory lengths accessed by A and B respectively.  */
>> +
>> +static bool
>> +vect_no_alias_p (struct data_reference *a, struct data_reference *b,
>> + tree segment_length_a, tree segment_length_b)
>>
>> please don't mix tree and wide_int so much.  Either use wide_int exclusively
>> throughout the function or use tree_int_cst_eq for equality and
>> tree_int_cst_le for the <= compares.
>>
>> +  comp_res = compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS 
>> (dr_b));
>> +  if (comp_res == 0)
>> +   comp_res = compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
>> +
>> +  /* Alias is known at compilation time.  */
>> +  if (comp_res == 0
>> + && TREE_CODE (length_factor) == INTEGER_CST
>> + && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
>> + && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST)
>> +   {
>> + if (vect_no_alias_p (dr_a, dr_b, segment_length_a, 
>> segment_length_b))
>> +   continue;
>>
>> I wonder if you'd rather want to check segment_length_a/b for INTEGER_CST
>> (not length_factor).
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> bin
>>>
>>> 2016-06-07  Bin Cheng  
>>>
>>> * tree-vect-data-refs.c (vect_no_alias_p): New function.
>>> (vect_prune_runtime_alias_test_list): Call vect_no_alias_p to
>>> resolve alias checks which are known at compilation time.
>>> Truncate vector LOOP_VINFO_MAY_ALIAS_DDRS(loop_vinfo) if all
>>> alias checks are resolved at compilation time.


Re: [PATCH][vectorizer][2/2] Hook up mult synthesis logic into vectorisation of mult-by-constant

2016-07-01 Thread Richard Biener
On Thu, 30 Jun 2016, Kyrill Tkachov wrote:

> 
> On 28/06/16 08:54, Richard Biener wrote:
> > On Thu, 16 Jun 2016, Kyrill Tkachov wrote:
> > 
> > > On 15/06/16 22:53, Marc Glisse wrote:
> > > > On Wed, 15 Jun 2016, Kyrill Tkachov wrote:
> > > > 
> > > > > This is a respin of
> > > > > https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00952.html following
> > > > > feedback.
> > > > > I've changed the code to cast the operand to an unsigned type before
> > > > > applying the multiplication algorithm
> > > > > and cast it back to the signed type at the end.
> > > > > Whether to perform the cast is now determined by the function
> > > > > cast_mult_synth_to_unsigned in which I've implemented
> > > > > the cases that Marc mentioned in [1]. Please do let me know
> > > > > if there are any other cases that need to be handled.
> > > > Ah, I never meant those cases as an exhaustive list, I was just looking
> > > > for
> > > > examples showing that the transformation was unsafe, and those 2 came to
> > > > mind:
> > > > 
> > > > - x*15 -> x*16-x the second one can overflow even when the first one
> > > > doesn't.
> > > > 
> > > > - x*-2 -> -(x*2) can overflow when the result is INT_MIN (maybe that's
> > > > redundant with the negate_variant check?)
> > > > 
> > > > On the other hand, as long as we remain in the 'positive' operations,
> > > > turning x*3 to x<<1+x seems perfectly safe. And even x*30 to (x*16-x)*2
> > > > cannot cause spurious overflows. But I didn't look at the algorithm
> > > > closely
> > > > enough to characterize the safe cases. Now if you have done it, that's
> > > > good
> > > > :-) Otherwise, we might want to err on the side of caution.
> > > > 
> > > I'll be honest, I didn't give it much thought beyond convincing myself
> > > that
> > > the two cases you listed are legitimate.
> > > Looking at expand_mult_const in expmed.c can be helpful (where it updates
> > > val_so_far for checking purposes) to see
> > > the different algorithm cases. I think the only steps that could cause
> > > overflow are alg_sub_t_m2, alg_sub_t2_m and alg_sub_factor or when the
> > > final
> > > step is negate_variant, which are what you listed (and covered in this
> > > patch).
> > > 
> > > richi is away on PTO for the time being though, so we have some time to
> > > convince ourselves :)
> > ;)  I think the easiest way would be to always use unsigned arithmetic.
> > 
> > While VRP doesn't do anything on vector operations we still have some
> > match.pd patterns that rely on correct overflow behavior and those
> > may be enabled for vector types as well.
> 
> That's fine with me.
> Here's the patch that performs the casts to unsigned and back when the input
> type doesn't wrap on overflow.
> 
> Bootstrapped and tested on arm, aarch64, x86_64.
> 
> How's this?

+static bool
+target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
+tree scaltype)
+{
...
+  tree vectype = get_vectype_for_scalar_type (scaltype);
+
+  if (!vectype)
+return false;
+
+  /* All synthesis algorithms require shifts, so bail out early if
+ target cannot vectorize them.
+ TODO: If the target doesn't support vector shifts but supports 
vector
+ addition we could synthesize shifts that way.  
vect_synth_mult_by_constant
+ would need to be updated to do that.  */
+  if (!vect_supportable_shift (LSHIFT_EXPR, scaltype))
+return false;

I think these tests should be done in the caller before calling
synth_mult (and vectype be passed down accordingly).  Also I wonder
if synth_mult for a * 2 produces a << 1 or a + a - the case of
a * 2 -> a + a was what Marcs patch handled.  Guarding off LSHIFT_EXPR
support that early will make that fail on targets w/o vector shift.

+  bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
+  bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
+
+  if (var == negate_variant
+  && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
+return false;

I think we don't have any targets that support PLUS but not MINUS
or targets that do not support NEGATE.  OTOH double-checking doesn't 
matter.

+apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
+stmt_vec_info stmt_vinfo)
+{
+  if (TREE_INT_CST_LOW (op2) == 0

integer_zerop (op2)

+  && (code == LSHIFT_EXPR
+ || code == PLUS_EXPR))

+  tree itype = TREE_TYPE (op2);

I think it's dangerous to use the type of op2 given you synthesize
shifts as well.  Why not use the type of op1?

+  /* TODO: Targets that don't support vector shifts could synthesize
+them using vector additions.  target_supports_mult_synth_alg 
would
+need to be updated to allow this.  */
+  switch (alg.op[i])
+   {

so I suppose one could at least special-case << 1 and always use
PLUS for that.

Otherwise looks ok to me.  There is a PR with a testcase for
the x * 2 issue so please add that if it is fixed or amend the 

Re: [Patch] Disable text mode translation in ada for Cygwin

2016-07-01 Thread Arnaud Charlet

> Text mode translation should not be done for Cygwin, especially since it
> does not
> support unicode setmode calls. This also fixes ada builds for Cygwin.
> 
> OK for trunk?
 
 OK, thanks.
>>> 
>>> Can someone please commit this? I don't have SVN write access.
>>> 
>>> Thanks.
>> 
>> Ping?
> 
> ping2? Is there a dedicated list for ADA patches?

This list is for submitting patches, which you have done, it is not really 
about pinging for commits, which should preferably be done by the submitter, 
after proper testing.

I do not have a setup to test cygwin changes, so cannot do it for you.

Arno



Re: [Patch] Disable text mode translation in ada for Cygwin

2016-07-01 Thread JonY
On 6/1/2016 18:27, JonY wrote:
> On 5/27/2016 06:25, JonY wrote:
>> On 5/26/2016 21:55, Arnaud Charlet wrote:
 Text mode translation should not be done for Cygwin, especially since it
 does not
 support unicode setmode calls. This also fixes ada builds for Cygwin.

 OK for trunk?
>>>
>>> OK, thanks.
>>>
>>
>> Can someone please commit this? I don't have SVN write access.
>>
>> Thanks.
>>
> 
> Ping?
> 
> 

ping2? Is there a dedicated list for ADA patches?




signature.asc
Description: OpenPGP digital signature


Re: Determine more IVs to be non-overflowing

2016-07-01 Thread Richard Biener
On Thu, 30 Jun 2016, Jan Hubicka wrote:

> Hi,
> i sent the email before, but it seems it never left my machine. Sorry for
> possible duplicates.  This is updated version which does not overflow (by
> capping nit), but still using widest_int for the calculatoins. It seems more
> readable to do so than using wi:add/wi:mult/wi:lt and overflow checks, but
> i can definitly update the patch to do it, too.
> 
> Bootstrapped/regtested x86_64-linux, OK?
> 
> Honza
> 
>   * tree-scalar-evolution.h (iv_can_overflow_p): Declare.
>   * tree-scalar-evolution.c (iv_can_overflow_p): New funcition.
>   (simple_iv): Use it.
>   * tree-ssa-loop-niter.c (nowrap_type_p): Use ANY_INTEGRAL_TYPE_P
> 
>   * gcc.dg/tree-ssa/scev-14.c: New testcase.
> 
> Index: testsuite/gcc.dg/tree-ssa/scev-14.c
> ===
> --- testsuite/gcc.dg/tree-ssa/scev-14.c   (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/scev-14.c   (working copy)
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
> +int a[100];
> +void t(unsigned int n)
> +{
> +  unsigned int i;
> +  for (i=0; i + a[i]++;
> +}
> +/* { dg-final { scan-tree-dump "Overflowness wrto loop niter:
> No-overflow"  "ivopts" } } */
> +/* { dg-final { scan-tree-dump-not "Overflowness wrto loop niter:
> Overflow" "ivopts" } } */
> Index: tree-scalar-evolution.c
> ===
> --- tree-scalar-evolution.c   (revision 237856)
> +++ tree-scalar-evolution.c   (working copy)
> @@ -280,6 +280,7 @@ along with GCC; see the file COPYING3.
>  #include "params.h"
>  #include "tree-ssa-propagate.h"
>  #include "gimple-fold.h"
> +#include "print-tree.h"

Don't see you need this.

>  static tree analyze_scalar_evolution_1 (struct loop *, tree, tree);
>  static tree analyze_scalar_evolution_for_address_of (struct loop *loop,
> @@ -3309,6 +3310,60 @@ scev_reset (void)
>  }
>  }
>  
> +/* Return true if the IV calculation in TYPE can overflow based on the 
> knowledge
> +   of the upper bound on the number of iterations of LOOP, the BASE and STEP
> +   of IV.
> +
> +   We do not use information whether TYPE can overflow so it is safe to
> +   use this test even for derived IVs not computed every iteration or
> +   hypotetical IVs to be inserted into code.  */
> +
> +bool
> +iv_can_overflow_p (struct loop *loop, tree type, tree base, tree step)

Exporting this is also not necessary?

> +{
> +  widest_int nit;
> +  wide_int base_min, base_max, step_min, step_max, type_min, type_max;
> +  signop sgn = TYPE_SIGN (type);
> +  signop base_sgn = TYPE_SIGN (TREE_TYPE (base));
> +  signop step_sgn = TYPE_SIGN (TREE_TYPE (step));
> +
> +  if (step == 0)
> +return false;

Err - you probably mean

 if (integer_zerop (step))

here?  your check is a NULL pointer check ...

> +
> +  if (TREE_CODE (base) == INTEGER_CST)
> +base_min = base_max = base;
> +  else if (TREE_CODE (base) == SSA_NAME && !POINTER_TYPE_P (TREE_TYPE (base))
> +&& get_range_info (base, _min, _max) == VR_RANGE)

split & into separate lines.  Also use && INTEGRAL_TYPE_P rather
than a negative check.  This means that pointer IVs are always
considered to overflow.

> +;
> +  else
> +return true;
> +
> +  if (TREE_CODE (step) == INTEGER_CST)
> +step_min = step_max = step;
> +  else if (TREE_CODE (step) == SSA_NAME && !POINTER_TYPE_P (TREE_TYPE (step))
> +&& get_range_info (step, _min, _max) == VR_RANGE)

Likewise.

> +;
> +  else
> +return true;
> +
> +  if (!get_max_loop_iterations (loop, ))
> +return true;
> +
> +  type_min = wi::min_value (type);
> +  type_max = wi::max_value (type);
> +  /* Watch overflow.  */
> +  if ((widest_int)1 << TYPE_PRECISION (type) < nit)
> +return true;

TYPE_PRECISION (type) - 1?  The comment can be improved I think.

> +  if ((widest_int::from (base_max, base_sgn)
> +   + widest_int::from (step_max, step_sgn) * (nit + 1))
> +   > widest_int::from (type_max, sgn)
> +  || (widest_int::from (type_min, sgn)
> +   > (widest_int::from (base_min, base_sgn)
> +  + widest_int::from (step_min, step_sgn) * (nit + 1
> +return true;

and this lacks any comment...  so it decodes to

 (base_max + step_max * (nit + 1) > type_max)
 || (type_min > base_min + step_min * (nit + 1))

and it basically assumes infinite precision arithmetic for
the computation.  As mentioned previously for __int128 widest_int
does _not_ guarantee this so you need to use FIXED_WIDE_INT
with a precision of WIDE_INT_MAX_PRECISION * 2 (+1?) like VRP does.

Note as both type_min/max and base_min/max are wide_ints the above
might be simplified

  step_max * (nit + 1) > type_max - base_max

where the subtraction can be carried out in wide_int(?) and you
should also CSE nit + 1 or transform it further to

  step_max * nit > type_max - base_max - step_max

where if I 

Re: [PATCH testsuite]XFAIL tests for -funsafe-loop-optimizations

2016-07-01 Thread Richard Biener
On Tue, Jun 28, 2016 at 8:18 AM, Bin Cheng  wrote:
> Hi,
> The previous patch removes support for -funsafe-loop-optimizations on GIMPLE, 
> as a follow up, this one marks related tests as XFAIL for the moment.  I 
> would like to either remove/rewrite the case after -funsafe-loop-optimization 
> is fully dicarded?
> Tests checked.  Is it OK?

Please remove them instead.  Removing -funsafe-loop-optimizations and
all traces of code using it in GCC is ok.

Thanks,
Richard.

> Thanks,
> bin
>
> gcc/testsuite/ChangeLog
> 2016-06-27  Bin Cheng  
>
> * gcc.dg/tree-ssa/pr19210-1.c: xfail unsafe loop opt warnings.
> * gcc.dg/tree-ssa/pr19210-2.c: Ditto.


Re: [PATCH GCC]Improve loop-niter to handle possible infinite loop.

2016-07-01 Thread Richard Biener
On Tue, Jun 28, 2016 at 8:18 AM, Bin Cheng  wrote:
> Hi,
> At the moment, loop niter analyzer depends on simple_iv to understand control 
> induction variable in order to do further niter analysis.  For cases reported 
> in PR57558 (comment #4), the control variable is not an SCEV because it's 
> converted from an smaller type and that type could overflow/wrap.  As a 
> result, niter information for such loops won't be analyzed because 
> number_of_iterations_exit exits immediately once simple_iv fails.  As a 
> matter of fact, niter analyzer can be improved by computing an additional 
> assumption under which the loop won't be infinite (or the corresponding iv 
> doesn't overflow).  This patch does so by changing both scev and niter 
> analyzer.  It introduces a new interface simple_iv_with_niters which is used 
> in niter analyzer.  The new interface returns an IV as well as a possible 
> niter expression, the expression indicates the maximum number the IV can 
> iterate before the returned simple_iv becoming invalid.  Given below example:
>
>   short unsigned int i;
>   int _1;
>
>   :
>   goto ;
>
>   :
>   arr[_1] = -1;
>   i_6 = i_2 + 1;
>
>   :
>   # i_2 = PHI <1(2), i_6(3)>
>   _1 = (int) i_2;
>   if (_1 != 199)
> goto ;
>   else
> goto ;
>
>   :
>   return;
>
> When analyzing variable "_1", the old interface simple_iv returns false, 
> while the new interface returns <{1, 1}_loop, 65534>.  It means "_1" is a 
> valid SCEV as long as it doesn't iterate more than 65534 times.
> This patch also introduces a new interface in niter analyzer 
> number_of_iterations_exit_assumptions.  The new interface further derives 
> assumption from the result of simple_iv_with_niters, and the assumption can 
> be used by other optimizers.  As for this loop, it's an unexpected gain 
> because assumption (198 < 65534) is naturally TRUE.
> For loops that could be infinite, the assumption will be an expression.  This 
> improvement is still good, for example, the next patch to will vectorize such 
> loops with help of this patch.
>
> This patch also changes how assumptions is handled in niter analyzer.  At the 
> moment, (non-true) assumptions are not supported unless 
> -funsafe-loop-optimizations are specified, after this patch, the new 
> interface exposes assumptions to niter user and let them make their own 
> decision.  In effect, this patch discards -funsafe-loop-optimizations on 
> GIMPLE level, which we think is not a very useful option anyway.  Next patch 
> will xfails tests for this option.  Well, the option is not totally discarded 
> because it requires RTL changes too.  I will follow up that after gimple part 
> change.
>
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

Please make simple_iv_with_niters accept NULL as iv_niters and pass
NULL from simple_iv to avoid useless work.

You have chosen to make the flags per loop instead of say, flags on
the global loop state.  The patch itself doesn't set the flag
on any loop thus it doesn't really belong into this patch IMHO, so
maybe you can split it out.  We do already have a plethora
of "flags" (badly packed) in struct loop and while I see the interface
is sth very specific adding another 4 bytes doesn't look
too nice here (but maybe we don't care, struct loop isn't allocated
that much hopefully).  I'd like to see a better comment
before the flags part in cfgloop.h though noting that those are not
flags computed by the compiler but flags that are set
by the consumer that affect semantics of certain (document which)
APIs.  And that consumers are expected to re-set
flags back!  (failing to do that might be a source of hard to track down bugs?)

Anyway, the _assumtions part of the patch is ok with the suggested change.

Thanks,
Richard.

> Thanks,
> bin
>
> 2016-06-27  Bin Cheng  
>
> * cfgloop.h (flags): New field to loop struct.
> (LOOP_F_INFINITE, LOOP_F_ASSUMPTIONS, LOOP_F_MUST_ROLL): New macros.
> (loop_flag_set, loop_flag_clear, loop_flag_set_p): New functions.
> * cfgloop.c (alloc_loop): Handle flags.
> * tree-scalar-evolution.c (simple_iv_with_niters): New funcion.
> (derive_simple_iv_with_niters): New function.
> (simple_iv): Rewrite using simple_iv_with_niters.
> * tree-scalar-evolution.h (simple_iv_with_niters): New decl.
> * tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions): New
> function.
> (number_of_iterations_exit): Rewrite using above function.
> * tree-ssa-loop-niter.h (number_of_iterations_exit_assumptions): New
> Decl.
>
> gcc/testsuite/ChangeLog
> 2016-06-27  Bin Cheng  
>
> * gcc.dg/tree-ssa/loop-41.c: New test.


Re: [PATCH] Do not emit SAVE_EXPR for already assigned SSA_NAMEs (PR71606).

2016-07-01 Thread Richard Biener
On Thu, Jun 23, 2016 at 1:14 PM, Eric Botcazou  wrote:
>> The gimplifier has been changed recently to use anonymous SSA_NAMEs instead
>> of temporary decls.
>
> But the PR is a regression present since GCC 4.7...
>
>> And the gimplifier uses save_expr (which is a gimplifier function BTW) on
>> both not gimplified at all as well as partially gimplified trees.
>
> Are you confounding it with something else?  Because save_expr is definitely
> not a gimplifier function, it's mostly used to build GENERIC trees.  That
> being said, I can imagine it being invoked from the gimplifier, but I'd like
> to see the backtrace.

So the issue is that the inliner uses fold_convert:

  if (value
  && value != error_mark_node
  && !useless_type_conversion_p (TREE_TYPE (p), TREE_TYPE (value)))
{
  /* If we can match up types by promotion/demotion do so.  */
  if (fold_convertible_p (TREE_TYPE (p), value))
rhs = fold_convert (TREE_TYPE (p), value);
  else

and converting a _Complex double to a _Complex long double.  That
generates

COMPLEX_EXPR <(long double) REALPART_EXPR >, (long
double) IMAGPART_EXPR >>

which in insert_init_stmt we insert into the IL w/o gimplifying it
(thus the operand
scanner ICEs).

IMHO using fold-convert in this case is bogus and ideally the testcase
should have been diagnosed.

fold_convertible_p has a comment

/* Returns true, if ARG is convertible to TYPE using a NOP_EXPR.  *

but clearly it isn't generating just a NOP_EXPR (or VIEW_CONVERT_EXPR
or other single operation) here.

So that is the thing to fix.  The way we build / insert the init stmts
can also be improved by properly
gimplifying the rhs first but of course that likely runs into the
SAVE_EXPR case you mentioned.

Richard.

> --
> Eric Botcazou


Re: [6/7] Explicitly classify vector loads and stores

2016-07-01 Thread Richard Biener
On Wed, Jun 15, 2016 at 10:52 AM, Richard Sandiford
 wrote:
> This is the main patch in the series.  It adds a new enum and routines
> for classifying a vector load or store implementation.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Why's the setting and checking of the memory access type conditional on !slp?
I'd rather avoid doing this :/

Otherwise it looks like a step in the right direction of splitting the
vectorizable_*
functions into a analysis part that records all decisions made and a transform
part that just applies it.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
> * tree-vectorizer.h (vect_memory_access_type): New enum.
> (_stmt_vec_info): Add a memory_access_type field.
> (STMT_VINFO_MEMORY_ACCESS_TYPE): New macro.
> (vect_model_store_cost): Take an access type instead of a boolean.
> (vect_model_load_cost): Likewise.
> * tree-vect-slp.c (vect_analyze_slp_cost_1): Update calls to
> vect_model_store_cost and vect_model_load_cost.
> * tree-vect-stmts.c (vec_load_store_type): New enum.
> (vect_model_store_cost): Take an access type instead of a
> store_lanes_p boolean.  Simplify tests.
> (vect_model_load_cost): Likewise, but for load_lanes_p.
> (get_group_load_store_type, get_load_store_type): New functions.
> (vectorizable_store): Use get_load_store_type.  Record the access
> type in STMT_VINFO_MEMORY_ACCESS_TYPE.
> (vectorizable_load): Likewise.
> (vectorizable_mask_load_store): Likewise.  Replace is_store
> variable with vls_type.
>
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h
> +++ gcc/tree-vectorizer.h
> @@ -485,6 +485,33 @@ enum slp_vect_type {
>hybrid
>  };
>
> +/* Describes how we're going to vectorize an individual load or store,
> +   or a group of loads or stores.  */
> +enum vect_memory_access_type {
> +  /* A simple contiguous access.  */
> +  VMAT_CONTIGUOUS,
> +
> +  /* A simple contiguous access in which the elements need to be permuted
> + after loading or before storing.  Only used for loop vectorization;
> + SLP uses separate permutes.  */
> +  VMAT_CONTIGUOUS_PERMUTE,
> +
> +  /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES.  */
> +  VMAT_LOAD_STORE_LANES,
> +
> +  /* An access in which each scalar element is loaded or stored
> + individually.  */
> +  VMAT_ELEMENTWISE,
> +
> +  /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
> + SLP accesses.  Each unrolled iteration uses a contiguous load
> + or store for the whole group, but the groups from separate iterations
> + are combined in the same way as for VMAT_ELEMENTWISE.  */
> +  VMAT_STRIDED_SLP,
> +
> +  /* The access uses gather loads or scatter stores.  */
> +  VMAT_GATHER_SCATTER
> +};
>
>  typedef struct data_reference *dr_p;
>
> @@ -602,6 +629,10 @@ typedef struct _stmt_vec_info {
>/* True if this is an access with loop-invariant stride.  */
>bool strided_p;
>
> +  /* Classifies how the load or store is going to be implemented
> + for loop vectorization.  */
> +  vect_memory_access_type memory_access_type;
> +
>/* For both loads and stores.  */
>bool simd_lane_access_p;
>
> @@ -659,6 +690,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
>  #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
>  #define STMT_VINFO_GATHER_SCATTER_P(S)(S)->gather_scatter_p
>  #define STMT_VINFO_STRIDED_P(S)   (S)->strided_p
> +#define STMT_VINFO_MEMORY_ACCESS_TYPE(S)   (S)->memory_access_type
>  #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
>  #define STMT_VINFO_VEC_REDUCTION_TYPE(S)   (S)->v_reduc_type
>
> @@ -1006,12 +1038,12 @@ extern void free_stmt_vec_info (gimple *stmt);
>  extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
>  stmt_vector_for_cost *,
> stmt_vector_for_cost *);
> -extern void vect_model_store_cost (stmt_vec_info, int, bool,
> +extern void vect_model_store_cost (stmt_vec_info, int, 
> vect_memory_access_type,
>enum vect_def_type, slp_tree,
>stmt_vector_for_cost *,
>stmt_vector_for_cost *);
> -extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree,
> - stmt_vector_for_cost *,
> +extern void vect_model_load_cost (stmt_vec_info, int, 
> vect_memory_access_type,
> + slp_tree, stmt_vector_for_cost *,
>   stmt_vector_for_cost *);
>  extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
>   enum vect_cost_for_stmt, stmt_vec_info,
> Index: gcc/tree-vect-slp.c
> 

Re: Problem in cxx_fundamental_alignment_p?

2016-07-01 Thread Bernd Schmidt

On 07/01/2016 10:34 AM, Dodji Seketeli wrote:


The patch below was bootstrapped and tested on x86_64-linux, without
issues,


The patch looks good to me, thanks.


Alright. I think we need a C frontend maintainer or maybe Jason to 
approve it. Attaching again.



but I'm not convinced this code is covered by any testcase.


Hmhm, looking at the test cases, accompanying the initial patch that
introduced that code, I agree.  I guess we should probably add a test case that
uses [[gnu::aligned (val)]], with 'val' being an alignment that is
greater than MAX (TYPE_ALIGN_UNIT (long_long_integer_type_node),
  TYPE_ALIGN_UNIT (long_double_type_node))
in the g++.dg/cpp0x/gen-attrs-*.C series of tests?


I really don't know what this code is even doing, so I'll leave that up 
to you...



Bernd

	* c-common.c (cxx_fundamental_alignment_p): Use TYPE_ALIGN_UNIT,
	not TYPE_ALIGN.

Index: gcc/c-family/c-common.c
===
--- gcc/c-family/c-common.c	(revision 237797)
+++ gcc/c-family/c-common.c	(working copy)
@@ -7668,10 +7668,10 @@ fail:
   return NULL_TREE;
 }
 
-/* Check whether ALIGN is a valid user-specified alignment.  If so,
-   return its base-2 log; if not, output an error and return -1.  If
-   ALLOW_ZERO then 0 is valid and should result in a return of -1 with
-   no error.  */
+/* Check whether ALIGN is a valid user-specified alignment, specified
+   in bytes.  If so, return its base-2 log; if not, output an error
+   and return -1.  If ALLOW_ZERO then 0 is valid and should result in
+   a return of -1 with no error.  */
 int
 check_user_alignment (const_tree align, bool allow_zero)
 {
@@ -12700,9 +12700,9 @@ scalar_to_vector (location_t loc, enum t
   return stv_nothing;
 }
 
-/* Return true iff ALIGN is an integral constant that is a fundamental
-   alignment, as defined by [basic.align] in the c++-11
-   specifications.
+/* Return true iff ALIGN, which is specified in bytes, is an integral
+   constant that is a fundamental alignment, as defined by
+   [basic.align] in the c++-11 specifications.
 
That is:
 
@@ -12712,10 +12712,11 @@ scalar_to_vector (location_t loc, enum t
 alignof(max_align_t)].  */
 
 bool
-cxx_fundamental_alignment_p  (unsigned align)
+cxx_fundamental_alignment_p (unsigned align)
 {
-  return (align <=  MAX (TYPE_ALIGN (long_long_integer_type_node),
-			 TYPE_ALIGN (long_double_type_node)));
+  unsigned limit = MAX (TYPE_ALIGN_UNIT (long_long_integer_type_node),
+			TYPE_ALIGN_UNIT (long_double_type_node));
+  return align <= limit;
 }
 
 /* Return true if T is a pointer to a zero-sized aggregate.  */


Re: Improve insert/emplace robustness to self insertion

2016-07-01 Thread Jonathan Wakely

On 30/06/16 21:51 +0200, François Dumont wrote:

On 29/06/2016 23:30, Jonathan Wakely wrote:


iterator
insert(const_iterator __position, value_type&& __x)
{ return emplace(__position, std::move(__x)); }

That's suboptimal, since in the general case we need an extra
construction for emplacing, but we know that we don't need to do that
when inserting rvalues.


   Why not ? I realized with your remarks that I was missing some 
tests in the new self_insert.cc. The ones to insert an rvalue coming 
from the vector itself. In the attached patch there is those 2 tests, 
do you agree with expected behavior ? For the moment it doesn't check 
that the source value has been indeed moved cause it doesn't, I will 
update it once it does.


No, I don't agree, because this is undefined behaviour:

  vv.insert(vv.begin(), std::move(vv[0]));

We don't need to support that case.

17.6.4.9 [res.on.arguments] says:

— If a function argument binds to an rvalue reference parameter, the
 implementation may assume that this parameter is a unique reference
 to this argument.

i.e. when passed an rvalue we can assume it is not a reference to
something in the container.

That's why we should not perform any more operations when inserting
rvalues than we do now. Any increase in copies/moves for inserting
rvalues is a regression, and should be avoided.



Re: [PATCH] rtl-optimization/71709, strcpy arg optimised out

2016-07-01 Thread Bernd Schmidt

On 07/01/2016 09:19 AM, Alan Modra wrote:

PR rtl-optimization/71709
* ira-lives.c (find_call_crossed_cheap_reg): Exit loop on arg reg
being set, not referenced.


Looks OK.


Bernd


[PATCH][expr.c] PR middle-end/71700: zero-extend sub-word value when widening constructor element

2016-07-01 Thread Kyrill Tkachov

Hi all,

In this arm wrong-code PR the struct assignment goes wrong when expanding 
constructor elements to a register destination
when the constructor elements are signed bitfields less than a word wide.
In this testcase we're intialising a struct with a 16-bit signed bitfield to -1 
followed by a 1-bit bitfield to 0.
Before it starts storing the elements it zeroes out the register.
 The code in store_constructor extends the first field to word size because it 
appears at the beginning of a word.
It sign-extends the -1 to word size. However, when it later tries to store the 
0 to bitposition 16 it has some logic
to avoid redundant zeroing since the destination was originally cleared, so it 
doesn't emit the zero store.
But the previous sign-extended -1 took up the whole word, so the position of 
the second bitfield contains a set bit.

This patch fixes the problem by zeroing out the bits of the widened field that 
did not appear in the original value,
so that we can safely avoid storing the second zero in the constructor.

With this patch the testcase passes at all optimisation levels (it only passed 
at -O0 before).

The initialisation of the struct to {-1, 0} now emits the RTL:
(insn 5 2 6 2 (set (reg/v:SI 112 [ e ])
(const_int 0 [0]))
 (nil))
(insn 6 5 7 2 (set (reg/v:SI 112 [ e ])
(const_int 65535 [0x]))
 (nil))

whereas before it generated:
(insn 5 4 6 (set (reg/v:SI 112 [ e ])
(const_int 0 [0]))
 (nil))

(insn 6 5 7 (set (reg/v:SI 112 [ e ])
(const_int -1 [0x]))
 (nil))


Bootstrapped and tested on arm, aarch64, x86_64 though the codepath is gated on 
WORD_REGISTER_OPERATIONS I didn't
expect any effect on aarch64 and x86_64 anyway.

Ok for trunk?

This bug appears on all versions from 4.9 onwards so I'll be testing it on the 
branches as well.

Thanks,
Kyrill

2016-07-01  Kyrylo Tkachov  

PR middle-end/71700
* expr.c (store_constructor): Mask sign-extended bits when widening
sub-word constructor element at the start of a word.

2016-07-01  Kyrylo Tkachov  

PR middle-end/71700
* gcc.c-torture/execute/pr71700.c: New test.
diff --git a/gcc/expr.c b/gcc/expr.c
index 9733401e09052457678a6a8817fe16f8a737927e..abb4416e41b60ab69f6f9d844e3a40853b0d1cde 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -6281,6 +6281,13 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
 		type = lang_hooks.types.type_for_mode
 		  (word_mode, TYPE_UNSIGNED (type));
 		value = fold_convert (type, value);
+		/* Make sure the bits beyond the original bitsize are zero
+		   so that we can correctly avoid extra zeroing stores in
+		   later constructor elements.  */
+		tree bitsize_mask
+		  = wide_int_to_tree (type, wi::mask (bitsize, false,
+			   BITS_PER_WORD));
+		value = fold_build2 (BIT_AND_EXPR, type, value, bitsize_mask);
 		  }
 
 		if (BYTES_BIG_ENDIAN)
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr71700.c b/gcc/testsuite/gcc.c-torture/execute/pr71700.c
new file mode 100644
index ..80afd3809c3abd24135fb36b757ace9f41ba3112
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr71700.c
@@ -0,0 +1,19 @@
+struct S
+{
+  signed f0 : 16;
+  unsigned f1 : 1;
+};
+
+int b;
+static struct S c[] = {{-1, 0}, {-1, 0}};
+struct S d;
+
+int
+main ()
+{
+  struct S e = c[0];
+  d = e;
+  if (d.f1 != 0)
+__builtin_abort ();
+  return 0;
+}


Re: [PATCH][ARM] Delete thumb_reload_in_h

2016-07-01 Thread Richard Earnshaw (lists)
On 10/06/16 15:55, Kyrill Tkachov wrote:
> Hi all,
> 
> This function just ICEs and isn't actually called from anywhere.
> It was introduced back in 2000 as part of a large merge introducing
> Thumb support
> and was aborting even then. I don't think having it around is of any
> benefit.
> 
> Tested on arm-none-eabi.
> 
> Ok for trunk?
> 

OK.  If it's never used, then deleting useless dead code like this is
obvious IMO.

R.

> Thanks,
> Kyrill
> 
> 2016-06-10  Kyrylo Tkachov  
> 
> * config/arm/arm.c (thumb_reload_in_hi): Delete.
> * config/arm/arm-protos.h (thumb_reload_in_hi): Delete prototype.
> 
> thumb_reload_in_hi.patch
> 
> 
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 
> b6981bcfb9070787cb7472b00c24e0f86a2cae1f..888aaf944a7974d61d96f356a94acefa8e9a2f21
>  100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -193,7 +193,6 @@ extern const char *thumb_call_via_reg (rtx);
>  extern void thumb_expand_movmemqi (rtx *);
>  extern rtx arm_return_addr (int, rtx);
>  extern void thumb_reload_out_hi (rtx *);
> -extern void thumb_reload_in_hi (rtx *);
>  extern void thumb_set_return_address (rtx, rtx);
>  extern const char *thumb1_output_casesi (rtx *);
>  extern const char *thumb2_output_casesi (rtx *);
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 
> 70e52546bc45c404788713c535e116e591ec5d4e..1618a1eaaa3a9902f45a15f3de5b524a4dfbebb8
>  100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -25670,13 +25670,6 @@ thumb_reload_out_hi (rtx *operands)
>emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], 
> operands[2]));
>  }
>  
> -/* Handle reading a half-word from memory during reload.  */
> -void
> -thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
> -{
> -  gcc_unreachable ();
> -}
> -
>  /* Return the length of a function name prefix
>  that starts with the character 'c'.  */
>  static int
> 



Re: [PATCH] ix86: fix PR/65105 testcase 2

2016-07-01 Thread Uros Bizjak
On Fri, Jul 1, 2016 at 10:19 AM, Jan Beulich  wrote:
> I cannot see how without allowing the compiler to use SSE2 instructions
> (as is done by all other tests for this PR scanning for particular
> instructions) this test could ever have succeeded anywhere.

Well, most of 32bit target testing nowadays is done on x86_64
multilibs, where SSE2 is also enabled by default on 32bit.

> gcc/testsuite/
> 2016-07-01  Jan Beulich  
>
> * gcc.target/i386/pr65105-2.c: Add -msse2.

OK everywhere.

Thanks,
Uros.

> --- 2016-06-30/gcc/testsuite/gcc.target/i386/pr65105-2.c
> +++ 2016-06-30/gcc/testsuite/gcc.target/i386/pr65105-2.c
> @@ -1,6 +1,6 @@
>  /* PR target/pr65105 */
>  /* { dg-do compile { target { ia32 } } } */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -msse2" } */
>  /* { dg-final { scan-assembler "por" } } */
>
>  long long i1, i2, res;
>
>
>


Re: Problem in cxx_fundamental_alignment_p?

2016-07-01 Thread Dodji Seketeli
Hello Bernd,

Bernd Schmidt  writes:

> I came across what I think is a bug in cxx_fundamental_alignment_p.
>
> User alignments are specified in units of bytes. This is documented,
> and we can also see the following in handle_aligned_attribute, for the
> case when we have no args:
>   align_expr = size_int (ATTRIBUTE_ALIGNED_VALUE / BITS_PER_UNIT);
> Then, we compute the log of that alignment in check_user_alignment:
>   i = check_user_alignment (align_expr, true)
> That's the log of the alignment in bytes, as we can see a little
> further down:
>   SET_TYPE_ALIGN (*type, (1U << i) * BITS_PER_UNIT);
>
> Then, we call check_cxx_fundamental_alignment_constraints, which
> recomputes the alignment (in bytes) from that log:
>   unsigned requested_alignment = 1U << align_log;
> It then calls cxx_fundamental_alignment_p, where we compare it to
> TYPE_ALIGN values, which are specified in bits. So I think we have a
> mismatch there.
>
> Does that sound right?

Yes, I think you are right on all account.

> The patch below was bootstrapped and tested on x86_64-linux, without
> issues,

The patch looks good to me, thanks.

> but I'm not convinced this code is covered by any testcase.

Hmhm, looking at the test cases, accompanying the initial patch that
introduced that code, I agree.  I guess we should probably add a test case that
uses [[gnu::aligned (val)]], with 'val' being an alignment that is
greater than MAX (TYPE_ALIGN_UNIT (long_long_integer_type_node),
  TYPE_ALIGN_UNIT (long_double_type_node))
in the g++.dg/cpp0x/gen-attrs-*.C series of tests?


-- 
Dodji


[PATCH] fix interaction of -S and -x {c,c++}-header

2016-07-01 Thread Jan Beulich
Irrespective of the use of -o this so far resulted in "error: output
filename specified twice", since cc1_options already produces a -o
option when -S was specified.

gcc/
2016-07-01  Jan Beulich  

* varasm.c (get_variable_section): Validate initializer in
named .bss-like sections.

gcc/testsuite/
2016-07-01  Jan Beulich  

* gcc.dg/bss.c: New.

--- 2016-06-30/gcc/cp/lang-specs.h
+++ 2016-06-30/gcc/cp/lang-specs.h
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3.
   cc1plus %{save-temps*|no-integrated-cpp:-fpreprocessed 
%{save-temps*:%b.ii} %{!save-temps*:%g.ii}}\
  %{!save-temps*:%{!no-integrated-cpp:%(cpp_unique_options)}}\
%(cc1_options) %2\
-   %{!fsyntax-only:-o %g.s \
+   %{!fsyntax-only:%{!S:-o %g.s} \
%{!fdump-ada-spec*:%{!o*:--output-pch=%i.gch}\
   %W{o*:--output-pch=%*}}%V",
  CPLUSPLUS_CPP_SPEC, 0, 0},
--- 2016-06-30/gcc/gcc.c
+++ 2016-06-30/gcc/gcc.c
@@ -1330,7 +1330,7 @@ static const struct compiler default_com
   %W{o*:--output-pch=%*}}%V}}\
  %{!save-temps*:%{!traditional-cpp:%{!no-integrated-cpp:\
cc1 %(cpp_unique_options) %(cc1_options)\
-   %{!fsyntax-only:-o %g.s \
+   %{!fsyntax-only:%{!S:-o %g.s} \
%{!fdump-ada-spec*:%{!o*:--output-pch=%i.gch}\
   %W{o*:--output-pch=%*}}%V}}}", 
0, 0, 0},
   {".i", "@cpp-output", 0, 0, 0},
--- 2016-06-30/gcc/testsuite/g++.dg/header.C
+++ 2016-06-30/gcc/testsuite/g++.dg/header.C
@@ -0,0 +1,9 @@
+/* This really should use "dg-do compile" without the -S in dg-options,
+   but the extra options get put after the input file in that case, and
+   hence the test would fail. */
+/* { dg-do assemble } */
+/* { dg-options "-S -x c++-header" } */
+
+struct s {
+   unsigned field;
+};
--- 2016-06-30/gcc/testsuite/gcc.dg/header.c
+++ 2016-06-30/gcc/testsuite/gcc.dg/header.c
@@ -0,0 +1,9 @@
+/* This really should use "dg-do compile" without the -S in dg-options,
+   but the extra options get put after the input file in that case, and
+   hence the test would fail. */
+/* { dg-do assemble } */
+/* { dg-options "-S -x c-header" } */
+
+struct s {
+   unsigned field;
+};





[PATCH v2] check initializer to be zero in .bss-like sections

2016-07-01 Thread Jan Beulich
Just like gas, which has recently learned to reject such initializers,
gcc shouldn't accept such either.
---
v2: Use dg-require-named-sections.

gcc/
2016-07-01  Jan Beulich  

* varasm.c (get_variable_section): Validate initializer in
named .bss-like sections.

gcc/testsuite/
2016-07-01  Jan Beulich  

* gcc.dg/bss.c: New.

--- 2016-06-30/gcc/testsuite/gcc.dg/bss.c
+++ 2016-06-30/gcc/testsuite/gcc.dg/bss.c
@@ -0,0 +1,9 @@
+/* Test non-zero initializers in .bss-like sections get properly refused.  */
+/* { dg-do compile } */
+/* { dg-require-named-sections "" } */
+/* { dg-options "" } */
+
+int __attribute__((section(".bss.local"))) x = 1; /* { dg-error "" "zero init" 
} */
+int *__attribute__((section(".bss.local"))) px =  /* { dg-error "" "zero 
init" } */
+int __attribute__((section(".bss.local"))) y = 0;
+int *__attribute__((section(".bss.local"))) py = (void*)0;
--- 2016-06-30/gcc/varasm.c
+++ 2016-06-30/gcc/varasm.c
@@ -1150,7 +1150,18 @@ get_variable_section (tree decl, bool pr
 
   resolve_unique_section (decl, reloc, flag_data_sections);
   if (IN_NAMED_SECTION (decl))
-return get_named_section (decl, NULL, reloc);
+{
+  section *sect = get_named_section (decl, NULL, reloc);
+
+  if ((sect->common.flags & SECTION_BSS) && !bss_initializer_p (decl))
+   {
+ error_at (DECL_SOURCE_LOCATION (decl),
+   "only zero initializers are allowed in section %qs",
+   sect->named.name);
+ DECL_INITIAL (decl) = error_mark_node;
+   }
+  return sect;
+}
 
   if (ADDR_SPACE_GENERIC_P (as)
   && !DECL_THREAD_LOCAL_P (decl)





[PATCH] ix86: fix PR/65105 testcase 2

2016-07-01 Thread Jan Beulich
I cannot see how without allowing the compiler to use SSE2 instructions
(as is done by all other tests for this PR scanning for particular
instructions) this test could ever have succeeded anywhere.

gcc/testsuite/
2016-07-01  Jan Beulich  

* gcc.target/i386/pr65105-2.c: Add -msse2.

--- 2016-06-30/gcc/testsuite/gcc.target/i386/pr65105-2.c
+++ 2016-06-30/gcc/testsuite/gcc.target/i386/pr65105-2.c
@@ -1,6 +1,6 @@
 /* PR target/pr65105 */
 /* { dg-do compile { target { ia32 } } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -msse2" } */
 /* { dg-final { scan-assembler "por" } } */
 
 long long i1, i2, res;





Re: [PATCH PR c/71699] Handle pointer arithmetic in nonzero tree checks

2016-07-01 Thread Richard Biener
On Thu, Jun 30, 2016 at 5:14 PM, Marc Glisse  wrote:
> On Thu, 30 Jun 2016, Richard Biener wrote:
>
>> points-to analysis already has the constraint that POINTER_PLUS_EXPR
>> cannot leave the object op0 points to.  Of course currently nothing uses the
>> fact whether points-to computes pointed-to as nothing (aka NULL) - so the
>> argument may be moot.
>>
>> Anyway, one of my points to the original patch was that POINTER_PLUS_EXPR
>> handling should be clearly separate from PLUS_EXPR and that we have
>> flag_delete_null_pointer_checks to allow targest to declare that 0 is a
>> valid
>> object pointer (and thus you can do 4 + -4 and reach NULL).
>
>
> Thanks. So the tricky point is that we are not allowed to transform g into f
> below:
>
> char*f(char*p){return p+4;}
> char*g(char*p){return (char*)((intptr_t)p+4);}
>
> That makes sense and seems much easier to guarantee than I feared, nice.
>
> (on the other hand, only RTL is able to simplify (long)p+4-(long)(p+4))

Hmm, yeah - we have some match.pd stuff to handle these kind of cases,
like p + ((long)p2 - (long)p1)) and also (long)(p + x) - (long)p.

OTOH to handle (long)p + 4 - (long)(p + 4) the only thing we need is to
transform (long)(p + 4) to (long)p + 4 ... that would simplify things but
of course we cannot ever undo that canonicalization if the result is
ever converted back to a pointer.  But maybe we can value-number it
the same with some tricks... (might be worth to file a bugreport)

Richard.

> --
> Marc Glisse


Re: [PATCH PR70729] The second part of patch.

2016-07-01 Thread Richard Biener
On Thu, Jun 30, 2016 at 4:51 PM, Yuri Rumyantsev  wrote:
> Richard,
>
> Could you please review additional simple fix for 70729 - we need to
> nullify safelen field of loops containing simduid intrinsics like
> GOMP_SIMD_LANE (introduced  e.g. for private variables). I checked
> that this fix cures regression which was missed by me since AVX2
> machine is required for  libgomp.fortran/examples-4/simd-2.f90.
>
> Regression testing and bootstrapping did not show any new failures.
> Is it OK for trunk?

Ok.

RIchard.

> Patch:
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 2669813..9fbd183 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -204,6 +204,10 @@ adjust_simduid_builtins (hash_table *htab)
>gcc_assert (TREE_CODE (arg) == SSA_NAME);
>simduid_to_vf *p = NULL, data;
>data.simduid = DECL_UID (SSA_NAME_VAR (arg));
> +  /* Need to nullify loop safelen field since it's value is not
> + valid after transformation.  */
> +  if (bb->loop_father && bb->loop_father->safelen > 0)
> +bb->loop_father->safelen = 0;
>if (htab)
>  {
>p = htab->find ();
>
> ChangeLog:
> 2016-06-30  Yuri Rumyantsev  
>
> PR tree-optimization/70729
> * tree-vectorizer.c (adjust_simduid_builtins): Nullify safelen field
> of loop since it can be not valid after transformation.
>
> 2016-06-30 17:28 GMT+03:00 Jakub Jelinek :
>> On Thu, Jun 30, 2016 at 04:25:16PM +0200, Thomas Schwinge wrote:
>>> Hi!
>>>
>>> On Thu, 30 Jun 2016 16:48:25 +0300, Yuri Rumyantsev  
>>> wrote:
>>> > Thanks for your help.
>>> > Could you please look at the following simple patch which cures
>>> > regression - we need to nullify loop safelen  field in
>>> > adjust_simduid_builtins:
>>> >
>>> > diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
>>> > index 2669813..f70380c 100644
>>> > --- a/gcc/tree-vectorizer.c
>>> > +++ b/gcc/tree-vectorizer.c
>>> > @@ -204,6 +204,10 @@ adjust_simduid_builtins (hash_table 
>>> > *htab)
>>> >   gcc_assert (TREE_CODE (arg) == SSA_NAME);
>>> >   simduid_to_vf *p = NULL, data;
>>> >   data.simduid = DECL_UID (SSA_NAME_VAR (arg));
>>> > + /* Need to nullify safelen fielf of loop since it's vale is not
>>> > +valid after transformation.  */
>>
>> s/fielf/field/
>> s/vale/value/
>>
>>> > + if (bb->loop_father && bb->loop_father->safelen > 0)
>>> > +   bb->loop_father->safelen = 0;
>>> >   if (htab)
>>> > {
>>> >   p = htab->find ();
>>
>> Jakub


Re: [PATCH][RFC] Fix PR71632, remove parts of TER

2016-07-01 Thread Richard Biener
On Fri, 1 Jul 2016, Jakub Jelinek wrote:

> On Thu, Jun 30, 2016 at 03:51:20PM +0200, Richard Biener wrote:
> > The following patch fixes PR71632 by removing delayed expansion of
> > TERed defs.  Instead it adds code to apply the scheduling effect
> > to the GIMPLE IL (so you also get better interleaved GIMPLE stmt
> > / generated RTL dumps in .expand).
> 
> Does anything from TER survive after this patch?
> I thought the whole point was that the expansion can see through
> the SSA_NAMEs and optimize based on that, by not seeing through
> them it doesn't, or if it somewhere still uses get_gimple_for_ssa_name,
> if the definition will be already expanded, it might expand stuff multiple
> times.

Yes, get_gimple_for_ssa_name is what survives (also the scheduling
effect though that is applied on GIMPLE now).  And yes, I noted the
issue of multiple expansions with get_gimple_for_ssa_name in 2)
(and that this is probably not worse than multiple expansion through
lazy expansion that ultimatively fails).

And yes, it no longer sees through SSA names for the cases that
the lazy SSA name expansion returned sth !REG_P.

Given the testresults show some regressions plus patched cc1 is
0.2% larger the patch obviously isn't ready yet.

Still I believe it is ultimatively the way to go as in theory 
fwprop/combine should have everything at hand to recover the original 
expansion from the single-use reg defs.

gcc.target/i386/xorps-sse2.c for example shows a missing transform
on GIMPLE given the comment on the testcase
/* Test that we generate xorps when the result is used in FP math.  */
is not reflected by what we do at expansion which decides locally
for the vector int f_int ^ g operation to rewrite it as float
operation (and subregs the result into vector int).  So it would
do that even if the result is used in an integer operation:

vector int x(vector float f, vector int h)
{
  vector int g = { 0x8000, 0, 0x8000, 0 };
  vector int f_int = (vector int) f;
  return (f_int ^ g) + h;
}

turns into

x:
.LFB1:
.cfi_startproc
xorps   .LC0(%rip), %xmm0
paddd   %xmm1, %xmm0
ret

but with the testcases logic it should have better used pxor.

Richard.



Re: [PATCH][RFC] Fix PR71632, remove parts of TER

2016-07-01 Thread Richard Biener
On Thu, 30 Jun 2016, Richard Biener wrote:

> 
> The following patch fixes PR71632 by removing delayed expansion of
> TERed defs.  Instead it adds code to apply the scheduling effect
> to the GIMPLE IL (so you also get better interleaved GIMPLE stmt
> / generated RTL dumps in .expand).
> 
> This removes the quadratic re-expansion of TERed RHS as seen in
> the testcase.
> 
> It affects initial RTL generation in following ways:
> 
>  1) it may expand stmts with unused LHS
>  2) it may expand dead code when use stmts are expanded and those
> use get_gimple_for_ssa, expanding its operands (again).
>  3) it no longer automatically tries to combine with defs doing
> memory loads (in case we TERed the load and expansion of the
> RHS resulted in a MEM).
>  4) the depth-first, left-to-right "expansion" of ops might not
> be a 100% match of what expand currently does
> 
> I expect that 1) and 2) are mostly a non-issue (and not worse than
> the effect seen in the PR).  I also expect that 3) is "quickly"
> recovered by fwprop or combine.
> 
> While I'm quite sure 3) exists I'm not 100% sure we never return
> a non-constant/reg for SSA name expansion (considering expand
> modifiers like EXPAND_SUM).
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> 
> I think this is also an important step towards eventually doing
> a meaningful GIMPLE-level scheduling (for register pressure,
> also to remove this awkward code-placement code from reassoc
> for example).  Without this TER would disrupt the result quite
> a bit (so the scheduling part of TER would be disabled when scheduling
> on GIMPLE).
> 
> Comments?  I'll throw this on one of our spec testers tonight any
> testing on non-x86 archs appreciated.

So testing works ok apart from a few fails:

x86_64/-m64

FAIL: gcc.target/i386/pr70155-22.c scan-assembler-not *movdi_internal
FAIL: gcc.target/i386/xorps-sse2.c scan-assembler xorps[ \\t]
FAIL: gcc.target/i386/xorps-sse2.c scan-assembler-not pxor

x86_64/-m32

XPASS: gcc.target/i386/addr-sel-1.c scan-assembler b+1
FAIL: gcc.target/i386/movbe-2.c scan-assembler-times movbe[ \\t] 4
FAIL: gcc.target/i386/movq-2.c scan-assembler movzbl[ \\t]*123
FAIL: gcc.target/i386/pr71245-1.c scan-assembler-not (fistp|fild)
FAIL: gcc.target/i386/pr71245-2.c scan-assembler-not movlps
FAIL: gcc.target/i386/xorps-sse2.c scan-assembler xorps[ \\t]
FAIL: gcc.target/i386/xorps-sse2.c scan-assembler-not pxor

I so far looked at gcc.target/i386/pr70155-22.c and without the patch
RTL expansion creates

(insn 7 6 0 2 (parallel [
(set (mem/c:TI (symbol_ref:DI ("c") [flags 0x40]  ) [1 c+0 S16 A128])
(plus:TI (mem/c:TI (symbol_ref:DI ("c") [flags 0x40]  
) [1 c+0 S16 A128])
(const_int 1 [0x1])))
(clobber (reg:CC 17 flags))
]) 
/space/rguenther/src/svn/trunk/gcc/testsuite/gcc.target/i386/pr70155-22.c:10 
210 {*addti3_doubleword}
 (nil))

directly while with the patch we get the load and store separated from the
add but then at combine-time:

Trying 7, 8 -> 9:
Successfully matched this instruction:
(set (mem/c:TI (symbol_ref:DI ("c") [flags 0x40]  ) [1 c+0 S16 A128])
(plus:TI (mem/c:TI (symbol_ref:DI ("c") [flags 0x40]  ) [1 c+0 S16 A128])
(const_int 1 [0x1])))
rejecting combination of insns 7, 8 and 9
original costs 8 + 8 + 4 = 20
replacement cost 24

so for some reason RTX costs in the backend reject this variant.  Still
the testcase wants it to happen.

I suspect the other testcases run into similar issues.  Like
gcc.target/i386/xorps-sse2.c somehow magically wanting the
vector integer xor operation in the GIMPLE IL be rewritten to vector float
which probably happens through some simplify-rtx tricks at RTL
expansion time (not sure if it is a good idea to do that, the
vector int constant could be a NaN/denormal when loaded as vector float
and thus trigger a similar issue as the testcase wants to avoid,
re-interpret of int vector as float causing slowdowns).

Richard.

> Thanks,
> Richard.
> 
> PS: avoid_deep_ter_for_debug could go away as well I think.
> 
> 2016-06-30  Richard Biener  
> 
>   PR middle-end/71632
>   * expr.c (expand_expr_real_1): Do not expand TERed SSA name
>   def rhs.
>   * cfgexpand.c (expand_gimple_basic_block): Expand defs of
>   TERed SSA names.  Move debug stmt use handling ...
>   * tree-outof-ssa.c (move_uses_r): ... here.  New helper
>   for ...
>   (rewrite_trees): ... code to apply the scheduling effect of TER
>   to the GIMPLE IL.
>   (remove_ssa_form): Move rewrite_trees call.
> 
>   * gcc.dg/torture/pr71632.c: New testcase.
> 
> Index: gcc/expr.c
> ===
> *** gcc/expr.c(revision 237873)
> --- gcc/expr.c(working copy)
> *** expand_expr_real_1 (tree exp, rtx target
> *** 9695,9704 
> LAST_VIRTUAL_REGISTER + 1);
>

[ARM] Minor fix in arm_function_ok_for_sibcall

2016-07-01 Thread Eric Botcazou
Several tests in the function were updated with a guard for a NULL decl but 
not the VxWorks-specific one, which results in a segfault building libgcc.

Tested on ARM/VxWorks, applied on mainline, 6 and 5 branches as obvious.


2016-07-01  Eric Botcazou  

* config/arm/arm.c (arm_function_ok_for_sibcall): Add another check
for NULL decl.

-- 
Eric BotcazouIndex: config/arm/arm.c
===
--- config/arm/arm.c	(revision 237902)
+++ config/arm/arm.c	(working copy)
@@ -6756,7 +6756,7 @@ arm_function_ok_for_sibcall (tree decl,
 
   /* The PIC register is live on entry to VxWorks PLT entries, so we
  must make the call before restoring the PIC register.  */
-  if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
+  if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
 return false;
 
   /* If we are interworking and the function is not declared static


Re: [PATCH][RFC] Fix PR71632, remove parts of TER

2016-07-01 Thread Jakub Jelinek
On Thu, Jun 30, 2016 at 03:51:20PM +0200, Richard Biener wrote:
> The following patch fixes PR71632 by removing delayed expansion of
> TERed defs.  Instead it adds code to apply the scheduling effect
> to the GIMPLE IL (so you also get better interleaved GIMPLE stmt
> / generated RTL dumps in .expand).

Does anything from TER survive after this patch?
I thought the whole point was that the expansion can see through
the SSA_NAMEs and optimize based on that, by not seeing through
them it doesn't, or if it somewhere still uses get_gimple_for_ssa_name,
if the definition will be already expanded, it might expand stuff multiple
times.

Jakub


[PATCH] rtl-optimization/71709, strcpy arg optimised out

2016-07-01 Thread Alan Modra
This patch fixes a thinko in find_call_crossed_cheap_reg.

For functions that return an argument unchanged, like strcat,
find_call_crossed_cheap_reg attempts to find an assignment between
a pseudo reg and the arg reg before the call, so that uses of the
pseudo after the call can instead use the return value.  The exit
condition on the loop looking at previous insns was wrong.  Uses of
the arg reg don't matter.  What matters is the insn setting the arg
reg as any assignment involving the arg reg prior to that insn is
likely a completely unrelated use of the hard reg.

Bootstrapped and regression tested powerpc64le-linux and x86_64-linux.
OK to apply?

PR rtl-optimization/71709
* ira-lives.c (find_call_crossed_cheap_reg): Exit loop on arg reg
being set, not referenced.

diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c
index 6950ffb..6b7ee81 100644
--- a/gcc/ira-lives.c
+++ b/gcc/ira-lives.c
@@ -1014,7 +1014,7 @@ find_call_crossed_cheap_reg (rtx_insn *insn)
  break;
}
 
- if (reg_overlap_mentioned_p (reg, PATTERN (prev)))
+ if (reg_set_p (reg, prev))
break;
}
  prev = PREV_INSN (prev);

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] Handle undefined extern vars in output_in_order

2016-07-01 Thread Alexander Monakov
On Thu, 23 Jun 2016, Alexander Monakov wrote:
> Hi,
> 
> I've discovered that this assert in my patch was too restrictive:
> 
> +  if (DECL_HAS_VALUE_EXPR_P (pv->decl))
> + {
> +   gcc_checking_assert (lookup_attribute ("omp declare target link",
> +  DECL_ATTRIBUTES (pv->decl)));
> 
> Testing for the nvptx target uncovered that there's another case where a
> global variable would have a value expr: emutls.  Sorry for not spotting it
> earlier (but at least the new assert did its job).  I think we should always
> skip here over decls that have value-exprs, just like hard-reg vars are
> skipped.  The following patch does that.  Is this still OK?

Ping.

> (bootstrapped/regtested on x86-64)
> 
> Alexander
> 
>   * cgraphunit.c (cgraph_order_sort_kind): New entry ORDER_VAR_UNDEF.
>   (output_in_order): Loop over undefined variables too.  Output them
>   via assemble_undefined_decl.  Skip variables that correspond to hard
>   registers or have value-exprs.
>   * varpool.c (symbol_table::output_variables): Handle undefined
>   variables together with defined ones.
>  
> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> index 4bfcad7..e30fe6e 100644
> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -2141,6 +2141,7 @@ enum cgraph_order_sort_kind
>ORDER_UNDEFINED = 0,
>ORDER_FUNCTION,
>ORDER_VAR,
> +  ORDER_VAR_UNDEF,
>ORDER_ASM
>  };
>  
> @@ -2187,16 +2188,20 @@ output_in_order (bool no_reorder)
> }
>  }
>  
> -  FOR_EACH_DEFINED_VARIABLE (pv)
> -if (!DECL_EXTERNAL (pv->decl))
> -  {
> -   if (no_reorder && !pv->no_reorder)
> -   continue;
> -   i = pv->order;
> -   gcc_assert (nodes[i].kind == ORDER_UNDEFINED);
> -   nodes[i].kind = ORDER_VAR;
> -   nodes[i].u.v = pv;
> -  }
> +  /* There is a similar loop in symbol_table::output_variables.
> + Please keep them in sync.  */
> +  FOR_EACH_VARIABLE (pv)
> +{
> +  if (no_reorder && !pv->no_reorder)
> +   continue;
> +  if (DECL_HARD_REGISTER (pv->decl)
> + || DECL_HAS_VALUE_EXPR_P (pv->decl))
> +   continue;
> +  i = pv->order;
> +  gcc_assert (nodes[i].kind == ORDER_UNDEFINED);
> +  nodes[i].kind = pv->definition ? ORDER_VAR : ORDER_VAR_UNDEF;
> +  nodes[i].u.v = pv;
> +}
>  
>for (pa = symtab->first_asm_symbol (); pa; pa = pa->next)
>  {
> @@ -,16 +2227,13 @@ output_in_order (bool no_reorder)
>   break;
>  
> case ORDER_VAR:
> -#ifdef ACCEL_COMPILER
> - /* Do not assemble "omp declare target link" vars.  */
> - if (DECL_HAS_VALUE_EXPR_P (nodes[i].u.v->decl)
> - && lookup_attribute ("omp declare target link",
> -  DECL_ATTRIBUTES (nodes[i].u.v->decl)))
> -   break;
> -#endif
>   nodes[i].u.v->assemble_decl ();
>   break;
>  
> +   case ORDER_VAR_UNDEF:
> + assemble_undefined_decl (nodes[i].u.v->decl);
> + break;
> +
> case ORDER_ASM:
>   assemble_asm (nodes[i].u.a->asm_str);
>   break;
> diff --git a/gcc/varpool.c b/gcc/varpool.c
> index ab615fa..e5f991e 100644
> --- a/gcc/varpool.c
> +++ b/gcc/varpool.c
> @@ -733,11 +733,6 @@ symbol_table::output_variables (void)
>  
>timevar_push (TV_VAROUT);
>  
> -  FOR_EACH_VARIABLE (node)
> -if (!node->definition
> -   && !DECL_HAS_VALUE_EXPR_P (node->decl)
> -   && !DECL_HARD_REGISTER (node->decl))
> -  assemble_undefined_decl (node->decl);
>FOR_EACH_DEFINED_VARIABLE (node)
>  {
>/* Handled in output_in_order.  */
> @@ -747,20 +742,19 @@ symbol_table::output_variables (void)
>node->finalize_named_section_flags ();
>  }
>  
> -  FOR_EACH_DEFINED_VARIABLE (node)
> +  /* There is a similar loop in output_in_order.  Please keep them in sync.  
> */
> +  FOR_EACH_VARIABLE (node)
>  {
>/* Handled in output_in_order.  */
>if (node->no_reorder)
> continue;
> -#ifdef ACCEL_COMPILER
> -  /* Do not assemble "omp declare target link" vars.  */
> -  if (DECL_HAS_VALUE_EXPR_P (node->decl)
> - && lookup_attribute ("omp declare target link",
> -  DECL_ATTRIBUTES (node->decl)))
> +  if (DECL_HARD_REGISTER (node->decl)
> + || DECL_HAS_VALUE_EXPR_P (node->decl))
> continue;
> -#endif
> -  if (node->assemble_decl ())
> -changed = true;
> +  if (node->definition)
> +   changed |= node->assemble_decl ();
> +  else
> +   assemble_undefined_decl (node->decl);
>  }
>timevar_pop (TV_VAROUT);
>return changed;
> 
>