libgo patch committed: Update to final 1.6 release

2016-02-17 Thread Ian Lance Taylor
The Go 1.6 release went out earlier today.  This patch updates libgo
to the final version of the Go 1.6 libraries.  Bootstrapped and tested
on x86_64-pc-linux-gnu.  Committed to mainline.

Ian


patch.txt.gz
Description: GNU Zip compressed data


RE: RFC: [Patch, PR Bug 60818] - ICE in validate_condition_mode on powerpc*-linux-gnu* ]

2016-02-17 Thread Rohit Arul Raj D
> -Original Message-
> From: Alan Modra [mailto:amo...@gmail.com]
> On Wed, Feb 17, 2016 at 06:31:45AM -0600, Segher Boessenkool wrote:
> > > (gdb) p debug_rtx (other_insn)
> > > (insn 11 10 16 2 (set (reg:SI 165 [ D.2339+-3 ])
> > > (if_then_else:SI (ne (reg:CC 166)
> > > (const_int 0 [0]))
> > > (reg:SI 168)
> > > (reg:SI 167))) test.c:7 317 {isel_unsigned_si}
> > >  (expr_list:REG_DEAD (reg:SI 168)
> > > (expr_list:REG_DEAD (reg:SI 167)
> > > (expr_list:REG_DEAD (reg:CC 166)
> > > (expr_list:REG_EQUAL (gtu:SI (reg:CC 166)
> > > (const_int 0 [0]))
> > > (nil))
> >
> > The REG_EQUAL there is bad already.  Where does that come from?
> 
> Rohit explain that quite well already, I thought.  It's there due to 
> transforming a GTU to NE in another insn, which means the reg mode
> combine changes to CCmode via rs6000.h:SELECT_CC_MODE.

Yes, when the reg mode (reg:166) changes while transforming a GTU to NE,
it affects the reg mode of all reg:166 in the corresponding REG_NOTE's too.
This is the actual code which changes the mode:

File: combine.c [function: simplify_set (Line No 6604)]

#ifndef HAVE_cc0
  
if (compare_mode != GET_MODE (dest))
{
  if (can_change_dest_mode (dest, 0, compare_mode))
{
.
  if (regno < FIRST_PSEUDO_REGISTER)
new_dest = gen_rtx_REG (compare_mode, regno);
  else
{
  SUBST_MODE (regno_reg_rtx[regno], compare_mode);>(A)
  new_dest = regno_reg_rtx[regno];
}

}
#endif  /* cc0 */
#endif  /* SELECT_CC_MODE */

  /* If the code changed, we have to build a new comparison in
 undobuf.other_insn.  */
  if (new_code != old_code)
{
  int other_changed_previously = other_changed;
  unsigned HOST_WIDE_INT mask;
  rtx old_cc_use = *cc_use;

  SUBST (*cc_use, gen_rtx_fmt_ee (new_code, GET_MODE (*cc_use), 
  dest, const0_rtx));  
-->(B)
  other_changed = 1;



Content of 'other_insn' before executing (A)

(gdb) p debug_rtx (other_insn)
(insn 11 10 16 2 (set (reg:SI 165 [ D.2339+-3 ])
(if_then_else:SI (gtu (reg:CCUNS 166)
(const_int 0 [0]))
(reg:SI 168)
(reg:SI 167))) test.c:7 317 {isel_unsigned_si}
 (expr_list:REG_DEAD (reg:SI 168)
(expr_list:REG_DEAD (reg:SI 167)
(expr_list:REG_DEAD (reg:CCUNS 166)
(expr_list:REG_EQUAL (gtu:SI (reg:CCUNS 166)
(const_int 0 [0]))
(nil))
$26 = void

Content of 'other_insn' after executing (A)

(gdb) p debug_rtx (other_insn)
(insn 11 10 16 2 (set (reg:SI 165 [ D.2339+-3 ])
(if_then_else:SI (gtu (reg:CC 166)
(const_int 0 [0]))
(reg:SI 168)
(reg:SI 167))) test.c:7 317 {isel_unsigned_si}
 (expr_list:REG_DEAD (reg:SI 168)
(expr_list:REG_DEAD (reg:SI 167)
(expr_list:REG_DEAD (reg:CC 166)
(expr_list:REG_EQUAL (gtu:SI (reg:CC 166)
(const_int 0 [0]))
(nil))

Content of 'other_insn' after executing (B)

gdb) p debug_rtx (other_insn)
(insn 11 10 16 2 (set (reg:SI 165 [ D.2339+-3 ])
(if_then_else:SI (ne (reg:CC 166)
(const_int 0 [0]))
(reg:SI 168)
(reg:SI 167))) test.c:7 317 {isel_unsigned_si}
 (expr_list:REG_DEAD (reg:SI 168)
(expr_list:REG_DEAD (reg:SI 167)
(expr_list:REG_DEAD (reg:CC 166)
(expr_list:REG_EQUAL (gtu:SI (reg:CC 166)
(const_int 0 [0]))
(nil))

Regards,
Rohit


C++ PATCH for c++/68679 (ICE with member template in unnamed struct)

2016-02-17 Thread Jason Merrill

...because apparently someone wants to write such a thing?

Anyway, it's a simple matter of looking through templates when calling 
reset_decl_linkage.


Tested x86_64-pc-linux-gnu, applying to trunk and 5.
commit c96622dda7826a32ccc3caa34d727d8c1e7c3b3f
Author: Jason Merrill 
Date:   Wed Feb 17 16:01:48 2016 -0500

	PR c++/68679
	* decl2.c (reset_type_linkage_2): Look through member templates.

diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c
index 15d7617..73b0d28 100644
--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -2685,14 +2685,22 @@ reset_type_linkage_2 (tree type)
 	  reset_decl_linkage (ti);
 	}
   for (tree m = TYPE_FIELDS (type); m; m = DECL_CHAIN (m))
-	if (VAR_P (m))
-	  reset_decl_linkage (m);
+	{
+	  tree mem = STRIP_TEMPLATE (m);
+	  if (VAR_P (mem))
+	reset_decl_linkage (mem);
+	}
   for (tree m = TYPE_METHODS (type); m; m = DECL_CHAIN (m))
 	{
-	  reset_decl_linkage (m);
-	  if (DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (m))
-	/* Also update its name, for cxx_dwarf_name.  */
-	DECL_NAME (m) = TYPE_IDENTIFIER (type);
+	  tree mem = STRIP_TEMPLATE (m);
+	  reset_decl_linkage (mem);
+	  if (DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (mem))
+	{
+	  /* Also update its name, for cxx_dwarf_name.  */
+	  DECL_NAME (mem) = TYPE_IDENTIFIER (type);
+	  if (m != mem)
+		DECL_NAME (m) = TYPE_IDENTIFIER (type);
+	}
 	}
   binding_table_foreach (CLASSTYPE_NESTED_UTDS (type),
 			 bt_reset_linkage_2, NULL);
diff --git a/gcc/testsuite/g++.dg/other/anon8.C b/gcc/testsuite/g++.dg/other/anon8.C
new file mode 100644
index 000..1fdd4c1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/other/anon8.C
@@ -0,0 +1,9 @@
+// PR c++/68679
+
+typedef struct {
+  struct {
+unsigned d[4];
+template
+unsigned operator[] (T i) const { return d[i]; }
+  } c;
+} A;


C++ PATCH for c++/68585 (wrong error with constexpr list-initialization)

2016-02-17 Thread Jason Merrill
init_subob_ctx changed value, and then evaluating value didn't change 
it, so we were thinking nothing had changed.  Fixed by remembering the 
value from before init_subob_ctx.


Tested x86_64-pc-linux-gnu, applying to trunk and 5.
commit afc049e2b450d710e62e3c0112e30a9600d6
Author: Jason Merrill 
Date:   Wed Feb 17 16:20:00 2016 -0500

	PR c++/68585
	* constexpr.c (cxx_eval_bare_aggregate): Fix 'changed' detection.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 11037fb..0eedfca 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -2234,6 +2234,7 @@ cxx_eval_bare_aggregate (const constexpr_ctx *ctx, tree t,
   bool side_effects_p = false;
   FOR_EACH_CONSTRUCTOR_ELT (v, i, index, value)
 {
+  tree orig_value = value;
   constexpr_ctx new_ctx;
   init_subob_ctx (ctx, new_ctx, index, value);
   if (new_ctx.ctor != ctx->ctor)
@@ -2246,7 +2247,7 @@ cxx_eval_bare_aggregate (const constexpr_ctx *ctx, tree t,
   /* Don't VERIFY_CONSTANT here.  */
   if (ctx->quiet && *non_constant_p)
 	break;
-  if (elt != value)
+  if (elt != orig_value)
 	changed = true;
 
   if (!TREE_CONSTANT (elt))
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-initlist9.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-initlist9.C
new file mode 100644
index 000..239b91e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-initlist9.C
@@ -0,0 +1,41 @@
+// PR c++/68585
+// { dg-do compile { target c++11 } }
+
+template
+  struct array
+  {
+T _M_data[N];
+  };
+
+template
+  struct integer_sequence
+  {
+  };
+
+struct Pos
+{
+  unsigned l;
+};
+
+template
+constexpr array make_grid_position(integer_sequence)
+{
+  return {{ Pos{Ints}... }};
+}
+
+constexpr array make_grid_positions()
+{
+  return make_grid_position(integer_sequence{});
+}
+
+template
+void generate_sudoku(T)
+{
+  constexpr auto positions = make_grid_positions(); // fail
+}
+
+int main()
+{
+  constexpr auto positions = make_grid_positions(); // ok
+  generate_sudoku(1);
+}


C++ PATCH for c++/65985 (wrong error with constexpr ctor)

2016-02-17 Thread Jason Merrill
The declaration of __PRETTY_FUNCTION__ was confusing 
build_constexpr_constructor_member_initializers.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit aef1a26de333aab5f602f6eb02cfd1ae751bb84f
Author: Jason Merrill 
Date:   Wed Feb 17 16:33:50 2016 -0500

	PR c++/65985
	* constexpr.c (build_constexpr_constructor_member_initializers):
	Handle an additional STATEMENT_LIST.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 0eedfca..d3b04b1 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -528,21 +528,32 @@ build_constexpr_constructor_member_initializers (tree type, tree body)
 {
   vec *vec = NULL;
   bool ok = true;
-  if (TREE_CODE (body) == MUST_NOT_THROW_EXPR
-  || TREE_CODE (body) == EH_SPEC_BLOCK)
-body = TREE_OPERAND (body, 0);
-  if (TREE_CODE (body) == STATEMENT_LIST)
-{
-  for (tree_stmt_iterator i = tsi_start (body);
-	   !tsi_end_p (i); tsi_next ())
-	{
-	  body = tsi_stmt (i);
-	  if (TREE_CODE (body) == BIND_EXPR)
-	break;
-	}
+  while (true)
+switch (TREE_CODE (body))
+  {
+  case MUST_NOT_THROW_EXPR:
+  case EH_SPEC_BLOCK:
+	body = TREE_OPERAND (body, 0);
+	break;
+
+  case STATEMENT_LIST:
+	for (tree_stmt_iterator i = tsi_start (body);
+	 !tsi_end_p (i); tsi_next ())
+	  {
+	body = tsi_stmt (i);
+	if (TREE_CODE (body) == BIND_EXPR)
+	  break;
+	  }
+	break;
+
+  case BIND_EXPR:
+	body = BIND_EXPR_BODY (body);
+	goto found;
+
+  default:
+	gcc_unreachable ();
 }
-  if (TREE_CODE (body) == BIND_EXPR)
-body = BIND_EXPR_BODY (body);
+ found:
   if (TREE_CODE (body) == CLEANUP_POINT_EXPR)
 {
   body = TREE_OPERAND (body, 0);
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-assert2.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-assert2.C
new file mode 100644
index 000..a329101
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-assert2.C
@@ -0,0 +1,31 @@
+// PR c++/65985
+// { dg-do compile { target c++14 } }
+
+#include 
+
+class Angle
+{
+  int degrees = 0;
+
+  constexpr auto invariant() const noexcept
+  {
+return 0 <= degrees && degrees < 360;
+  }
+
+public:
+  explicit constexpr Angle(int n) noexcept
+: degrees{n % 360}
+  {
+assert(invariant());
+  }
+
+  /* implicit */ constexpr operator auto() const noexcept
+  {
+return degrees;
+  }
+};
+
+int main()
+{
+  static_assert(Angle{360} == 0, "");
+}


Re: [PATCH] 69780 - [4.9/5/6 Regression] ICE on __builtin_alloca_with_align, with small alignment

2016-02-17 Thread Martin Sebor

On 02/17/2016 03:50 PM, Jakub Jelinek wrote:

On Wed, Feb 17, 2016 at 01:48:26PM -0700, Martin Sebor wrote:

I shifted the alignment so that it could be compared against
MAX_STACK_ALIGNMENT.  But after some searching it seems as though
MAX_STACK_ALIGNMENT is in bits, rather than bytes as I had assumed,
so I've removed the shift.


The reason why MAX_STACK_ALIGNMENT is wrong is that on most targets
it is terribly small number (a couple of bytes usually), only i?86/x86_64 is
an exception, because it is the only target that supports dynamic stack
realignment.


I see. Thank you for the explanation.  I've confirmed it in
an arm-eabi cross compiler where MAX_STACK_ALIGNMENT is 64.

What I still don't understand is why a user-specified alignment
is being tested for inequality to MAX_STACK_ALIGNMENT in
check_cxx_fundamental_alignment_constraints (the code whose
example I followed):

7765 #undef MAX_TARGET_FIELD_ALIGNMENT
7766 /* For stack variables, the target supports at most
7767 MAX_STACK_ALIGNMENT. */
7768 else if (decl_function_context (node) != NULL
7769 && requested_alignment > (max_align = MAX_STACK_ALIGNMENT))
7770 alignment_too_large_p = true;

That would then seem also wrong, although I haven't been able to
trigger that code with a simple test case because the call to
decl_function_context() always returns null, so maybe the code
is never used.


All other targets do support more aligned variables than that, but because
they don't support dynamic stack realignment, they handle those more aligned
automatic variables by doing alloca instead.  Which is exactly why we need
__builtin_alloca_with_align to support those larger alignments.
There is no inherent reason why __builtin_alloca_with_align can't support
arbitrary (power of 2 > BITS_PER_UNITS of course) alignments, as long as
it fits into address space and the alignment doesn't run into other memory,
but that is the general problem of alloca, it is up to the user to ensure
he doesn't run out of the stack, and the alignment is no different.


I introduced the check for the upper bound because larger alignment
values (1L << 32 and greater) also cause an ICE.

Imposing the same constraint as on overaligned stack variables which
are rejected when the alignment is greater than 1 << 28 bytes avoids
the ICE:

  void f (void) {
alignas (1LU << 29) int i;
  }

It looks like I was just unlucky enough to pick the wrong piece
of code to put the constraint in place (i.e.,
check_cxx_fundamental_alignment_constraints.

I've adjusted the patch to use similar logic as in
check_user_alignment().  I've also adjusted the documentation to
mention the upper bound, leaving the exact upper bound unspecified
since I get the impression that it's imposed by the internals of
the implementation and might change in the future.


No, see above.  And, if you want the exact largest possible power of 2
smaller than ~0U, you can use (unsigned int) INTTYPE_MINIMUM (int).


I was looking for the largest possible power of 2 greater than
~0U but something like this will do.  I believe it evaluates to
the same result as (UINT_MAX/2 + 1) and the latter seems clearer
and safer to me (based on the comments about compiler bugs near
the definition of INTTYPE_MINIMUM and based on the fact that
there are only two uses of the macro in the code base, neither
of which using int as its argument).

Attached is the updated patch.  Hopefully it's acceptable.

Martin
PR middle-end/69780 - [4.9/5/6 Regression] ICE on __builtin_alloca_with_align
	with small alignment
PR c/69759 - __builtin_alloca and __builtin_alloca_with_align undocumented

gcc/c-family/ChangeLog:
2016-02-17  Martin Sebor  

	PR middle-end/69780
	* c-common.c (check_builtin_function_arguments): Validate and reject
	invalid arguments to __builtin_alloca_with_align.

gcc/ChangeLog:
2016-02-17  Martin Sebor  

	PR c/69759
	* doc/extend.texi (Other Builtins): Document __builtin_alloca and
	__builtin_alloca_with_align.

gcc/testsuite/ChangeLog:
2016-02-17  Martin Sebor  

	PR middle-end/69780
	* g++.dg/ext/builtin_alloca.C: New test.
	* gcc.dg/builtins-68.c: New test.

Index: gcc/c-family/c-common.c
===
--- gcc/c-family/c-common.c	(revision 233476)
+++ gcc/c-family/c-common.c	(working copy)
@@ -9818,6 +9818,33 @@ check_builtin_function_arguments (tree f
 
   switch (DECL_FUNCTION_CODE (fndecl))
 {
+case BUILT_IN_ALLOCA_WITH_ALIGN:
+  {
+	/* Get the requested alignment (in bits) if it's a constant
+	   integer expression.  */
+	unsigned HOST_WIDE_INT align = TREE_CODE (args[1]) == INTEGER_CST
+	  && tree_fits_uhwi_p (args[1]) ? tree_to_uhwi (args[1]) : 0;
+
+	/* Determine if the requested alignment is a power of 2.  */
+	if ((align & (align - 1)))
+	  align = 0;
+
+	/* The maximum alignment in bits corresponding to the same
+	   maximum in bytes enforced in check_user_alignment().  */
+	unsigned maxalign = 

[wwwdocs] Changes for LTO and IPA, ver 2

2016-02-17 Thread Jan Hubicka
Hi,
it seems I have updated the patch for comments received but did not send
updated version to the ML. Here it is.

Honza

Index: changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/changes.html,v
retrieving revision 1.58
diff -c -3 -p -r1.58 changes.html
*** changes.html15 Feb 2016 11:32:56 -  1.58
--- changes.html18 Feb 2016 04:16:50 -
*** For more information, see the
*** 50,55 
--- 50,124 
of array bounds.  In particular, it enables
-fsanitize=bounds as well as instrumentation of
flexible array member-like arrays.
+ Type-based alias analysis now disambiguates accesses to different
+   pointers. This improves precision of the alias oracle by about 20-30%
+   on higher-level C++ programs. Programs doing invalid type punning
+   of pointer types may now need -fno-strict-aliasing
+   to work correctly.
+ Alias analysis now correctly supports weakref and
+   alias attributes. This makes it possible to access
+   both a variable and its alias in one translation unit which is common
+   with link-time optimization.
+ Value range propagation now assumes that this pointer
+   of C++ member functions is non-NULL.  This eliminates 
+   common NULL pointer checks
+   but also breaks some non-conforming code-bases (such as Qt-5, Chromium,
+   KDevelop). As a temporary work-around
+   -fno-delete-null-pointer-checks can be used. Wrong
+   code can be identified by using -fsanitize=undefined.
+ Link-time optimization improvements:
+ 
+   warning and error attributes are now
+ correctly preserved by declaration linking and thus
+ -D_FORTIFY_SOURCE=2 is now supported with 
-flto.
+   Type merging was fixed to handle C and Fortran interoperability
+ rules as defined by the Fortran 2008 language standard.
+ 
+ As an exception, CHARACTER(KIND=C_CHAR) is not 
inter-operable
+ with char in all cases because it is an array while
+ char is scalar.
+ INTEGER(KIND=C_SIGNED_CHAR) should be used instead.
+ In general, this inter-operability can not be implemented, for
+ example, on targets where function passing conventions of arrays
+ differs from scalars.
+   More type information is now preserved at link time reducing
+ the loss of accuracy of the type based alias analysis compared
+ to builds without link-time optimization.
+   Invalid type punning on global variables and declarations is now
+ reported with -Wodr-type-mismatch.
+   The size of LTO object files was reduced by about 11% (measured
+ by compiling Firefox 46.0).
+   Link-time parallelization (enabled using -flto=n)
+ was significantly improved by decreasing the size of streamed
+ data when partitioning programs.  The size of streamed
+ IL while compiling Firefox 46.0 was reduced by 66%.
+   Linker plugin was extended to pass information about type of
+ binary produced to GCC back-end (that can be also manually controlled
+ by -flinker-output).  This makes it possible to
+ properly confiugre code generator and support incremental
+ linking. Incremental linking of LTO objects by gcc -r is
+ now supported on plugin-enabled setups.
+ There are two ways to perform incremental linking:
+ 
+Linking by ld -r will result in object file
+with all sections from individual object files mechanically merged.
+This delays the actual linking to the time final binary is produced
+and thus permits whole program optimization.  Linking such object
+file is however slower.
+Linking by gcc -r will lead to link time 
optimization
+and produce final binary into the object file.  Linking such object
+file is fast but avoids any benefirts from whole program 
optimization.
+ 
+ GCC 7 will support incremental link-time optimization with gcc 
-r.
+ 
+ Inter-procedural optimization improvements:
+ 
+   Basic jump threading is now performed before profile construction
+ and inline analysis, resulting in more realistic size and time 
estimates
+ that drive the heuristics of the of inliner and function cloning 
passes.
+   Function cloning now more aggressively eliminates unused function
+ parameters.
+ 

  
  


Re: [PATCH] Fix PR c++/68948 (wrong code generation due to invalid constructor call)

2016-02-17 Thread Jason Merrill

OK.

Jason


Re: PPC libgcc IEEE128 soft-fp exception/rounding fixes

2016-02-17 Thread Alan Modra
On Wed, Feb 17, 2016 at 05:40:01PM -0600, Paul E. Murphy wrote:
> - FP_INIT_ROUNDMODE writes junk to the fpscr. I assume this should be
>   reading the fpscr and initializing the local rounding mode variable
>   declared via _FP_DECL_EX.

Yeah, looks that way.

> - FP_TRAPPING_EXCEPTIONS evaluates to zero where used. It seems like it
>   should return a bit field of FP_EX_* bits indicating which trap is
>   enabled. Likewise, when these bits are set in the fpscr, the trap is
>   enabled.

Yes, but

> +/* A set bit indicates an exception is trapping.  */
> +# define FP_TRAPPING_EXCEPTIONS ((_fpscr.i << 22) & FP_EX_ALL)

why then a shift here, since FP_EX_* are defined as the actual
register bits?  Oh, I see.  FP_EX_* are the status bits, and you want
the enable bits.  ie. bit 56 rather than bit 34, bit 57 rather than
bit 35 and so on (bits numbered from 0 as msb).  A comment to that
effect might reduce head scratching.

-- 
Alan Modra
Australia Development Lab, IBM


Re: PPC libgcc IEEE128 soft-fp exception/rounding fixes

2016-02-17 Thread Joseph Myers
I have no comments on this patch beyond again suggesting that it would be 
desirable for some future float128 GCC patch to enable as many as possible 
of the x86 float128 tests (that is, tests of float128 anywhere in the GCC 
testsuite that currently are restricted to x86, x86_64 and ia64 targets or 
some subset thereof) also for powerpc (with appropriate dg- directives to 
allow target-specific compilation options to be added, say dg-add-options 
float128).

While some of those tests are inapplicable to powerpc (e.g. 
gcc.dg/torture/float128-extendxf-underflow.c because it uses XFmode, and 
float128-mul-underflow.c, float128-truncdf-underflow.c, 
float128-truncsf-underflow.c because they test for after-rounding tininess 
detection, which is correct for x86 / ia64 but not for powerpc), others 
should apply to powerpc just as to x86 / ia64, and the tests do test 
various aspects of exceptions / rounding modes handling (and other issues, 
e.g. conversions in fp-int-convert-float128*), and it would not surprise 
me if they would have shown up some of the issues fixed by this patch.

-- 
Joseph S. Myers
jos...@codesourcery.com


PPC libgcc IEEE128 soft-fp exception/rounding fixes

2016-02-17 Thread Paul E. Murphy
Hi all,

I am fairly new to IBM and recently appointed maintainer of libdfp,
and work on glibc on ppc. This is my first foray into libgcc.

libdfp implements many common transcendental functions, overrides
type conversions between decimal float and other GCC types with more
optimized variants for dpd encoding.

While investigating some rounding issues with conversions, I tried out
the recent IEEE128 soft-fp support for PPC using a supporting compiler,
while building some of the soft-fp parts locally.

I ran into the following issues which I have attempted to correct with
the attached patch for rs6000/sfp-machine.h:

- FP_INIT_ROUNDMODE writes junk to the fpscr. I assume this should be
  reading the fpscr and initializing the local rounding mode variable
  declared via _FP_DECL_EX.

- FP_TRAPPING_EXCEPTIONS evaluates to zero where used. It seems like it
  should return a bit field of FP_EX_* bits indicating which trap is
  enabled. Likewise, when these bits are set in the fpscr, the trap is
  enabled.


libgcc
* config/rs6000/sfp-machine.h:
(_FP_DECL_EX): Declare _fpsr as union of u64 and double.
(FP_TRAPPING_EXCEPTIONS): Remove this, FP_HANDLE_EXCEPTIONS
will do them implicitly later on.
(FP_INIT_ROUNDMODE): Read the fpscr instead of writing
mystery value.
(FP_ROUNDMODE): Update type of _fpscr.
diff --git a/libgcc/config/rs6000/sfp-machine.h b/libgcc/config/rs6000/sfp-machine.h
index 75d5e1a..4bc0040 100644
--- a/libgcc/config/rs6000/sfp-machine.h
+++ b/libgcc/config/rs6000/sfp-machine.h
@@ -130,9 +130,9 @@ void __sfp_handle_exceptions (int);
 if (__builtin_expect (_fex, 0))		\
   __sfp_handle_exceptions (_fex);		\
   } while (0);
-/* A set bit indicates an exception is masked and a clear bit indicates it is
-   trapping.  */
-# define FP_TRAPPING_EXCEPTIONS (~_fpscr & (FP_EX_ALL >> 22))
+
+/* A set bit indicates an exception is trapping.  */
+# define FP_TRAPPING_EXCEPTIONS ((_fpscr.i << 22) & FP_EX_ALL)
 
 # define FP_RND_NEAREST	0x0
 # define FP_RND_ZERO	0x1
@@ -141,16 +141,16 @@ void __sfp_handle_exceptions (int);
 # define FP_RND_MASK	0x3
 
 # define _FP_DECL_EX \
-  unsigned long long _fpscr __attribute__ ((unused)) = FP_RND_NEAREST
+  union { unsigned long long i; double d; } _fpscr __attribute__ ((unused)) = \
+	 { .i = FP_RND_NEAREST }
 
 #define FP_INIT_ROUNDMODE			\
   do {		\
-__asm__ __volatile__ ("mtfsf 255, %0"	\
-			  :			\
-			  : "f" (_fpscr));	\
+__asm__ __volatile__ ("mffs %0"		\
+			  : "=f" (_fpscr.d));	\
   } while (0)
 
-# define FP_ROUNDMODE	(_fpscr & FP_RND_MASK)
+# define FP_ROUNDMODE	(_fpscr.i & FP_RND_MASK)
 #endif	/* !__FLOAT128__ */
 
 /* Define ALIASNAME as a strong alias for NAME.  */


Re: RFC: [Patch, PR Bug 60818] - ICE in validate_condition_mode on powerpc*-linux-gnu* ]

2016-02-17 Thread Alan Modra
On Wed, Feb 17, 2016 at 06:31:45AM -0600, Segher Boessenkool wrote:
> > Corresponding content of "op" which causes the ICE:
> > gdb) p debug_rtx (op)
> > (gtu:SI (reg:CC 166)  -- (operator and mode doesn't 
> > match)
> > (const_int 0 [0]))
> 
> That is invalid RTL for this target (should be CCUNS).  Invalid RTL
> should not be passed to recog.

Really??  combine does that all the time, when it asks "is this
instruction valid"!

> > (gdb) p debug_rtx (other_insn)
> > (insn 11 10 16 2 (set (reg:SI 165 [ D.2339+-3 ])
> > (if_then_else:SI (ne (reg:CC 166)
> > (const_int 0 [0]))
> > (reg:SI 168)
> > (reg:SI 167))) test.c:7 317 {isel_unsigned_si}
> >  (expr_list:REG_DEAD (reg:SI 168)
> > (expr_list:REG_DEAD (reg:SI 167)
> > (expr_list:REG_DEAD (reg:CC 166)
> > (expr_list:REG_EQUAL (gtu:SI (reg:CC 166)
> > (const_int 0 [0]))
> > (nil))
> 
> The REG_EQUAL there is bad already.  Where does that come from?

Rohit explain that quite well already, I thought.  It's there due to
combine transforming a GTU to NE in another insn, which means the reg
mode changes to CCmode via rs6000.h:SELECT_CC_MODE.

You might argue that combine shouldn't create such a note, but whether
the note is valid or not depends on the target, doesn't it?  And the
usual way for combine to check validity of rtl is to form up an
instruction and pass that to recog.  Which is exactly what happens
later when combine tries to use the note and runs into the rs6000
backend assert.

It seems quite plain to me that this is primarily an rs6000 backend
problem, solved by the blindingly obvious patch I posted.  Whether you
want to do something in combine as well is a secondary problem.  The
rs6000 backend shouldn't assert on this rtl.

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] 69780 - [4.9/5/6 Regression] ICE on __builtin_alloca_with_align, with small alignment

2016-02-17 Thread Jakub Jelinek
On Wed, Feb 17, 2016 at 01:48:26PM -0700, Martin Sebor wrote:
> I shifted the alignment so that it could be compared against
> MAX_STACK_ALIGNMENT.  But after some searching it seems as though
> MAX_STACK_ALIGNMENT is in bits, rather than bytes as I had assumed,
> so I've removed the shift.

The reason why MAX_STACK_ALIGNMENT is wrong is that on most targets
it is terribly small number (a couple of bytes usually), only i?86/x86_64 is
an exception, because it is the only target that supports dynamic stack
realignment.
All other targets do support more aligned variables than that, but because
they don't support dynamic stack realignment, they handle those more aligned
automatic variables by doing alloca instead.  Which is exactly why we need
__builtin_alloca_with_align to support those larger alignments.
There is no inherent reason why __builtin_alloca_with_align can't support
arbitrary (power of 2 > BITS_PER_UNITS of course) alignments, as long as
it fits into address space and the alignment doesn't run into other memory,
but that is the general problem of alloca, it is up to the user to ensure
he doesn't run out of the stack, and the alignment is no different.

> It's not obvious to me that this is guaranteed to be correct.
> IMO, even if it happens to be, I find it much clearer to check
> against MAX_STACK_ALIGNMENT (or whatever macro describes the
> limit if not this one).

See above, there is no macro describing such limit, it is solely about
doing pretty much __builtin_alloca (size + alignment - 1);
and realign the pointer.

> That would be incorrect because ~0U isn't neither a power of 2, nor
> the enforced stack alignment.  Again, using the actual limit encoded
> in MAX_STACK_ALIGNMENT seems correct and IMO results in much clearer
> code.

No, see above.  And, if you want the exact largest possible power of 2
smaller than ~0U, you can use (unsigned int) INTTYPE_MINIMUM (int).

> I don't mind waiting a bit for the documentation review, but I do
> feel it's important to update the documentation at the same time
> as making the change to the interface of the builtin.  The GCC
> Coding Conventions even requires it:
> 
>   Any change to documented behavior (for example, the behavior of
>   a command-line option or a GNU language extension) must include
>   the necessary changes to the manual.

But the builtin (which IMHO really was never meant to be user accessible,
but has been added before we had internal functions) is not documented yet.
So, the documentation can be added before or after that IMHO.

> FWIW, since I haven't noticed a clear preference for either of
> these two styles in the code base I decided to count the number
> of occurrences of each to see if one is prevalent.  Although
> the results are mildly in favor of the style you suggest, they
> clearly indicate the lack of consensus:

First of all, you are also counting the static (typically aggregate)
initializers, which are indeed often written as
static ... var =
{

};
But even then the greps I've done were like 2440 vs. 819, and that also
included the file scope initializers.

Jakub


Re: [PATCH] 69780 - [4.9/5/6 Regression] ICE on __builtin_alloca_with_align, with small alignment

2016-02-17 Thread Martin Sebor

On 02/17/2016 12:39 AM, Jakub Jelinek wrote:

On Tue, Feb 16, 2016 at 06:04:48PM -0700, Martin Sebor wrote:

Formatting.  = needs to be on the next line.


There are literally dozens of examples of this style in this file
alone.  In one of the two instances of this style in this patch,
moving the equals sign to the next line would force me to split
the initializer expression over the next two lines to avoid
exceeding the 80 character per line limit and make the code
harder to read.  I also don't see the style you suggest mentioned
in the GNU coding standard or in the GCC coding conventions.
I would prefer to leave this detail to the discretion of the
author.


Please change this, consistency is very much desirable.  Yes, there are
various formatting inconsistencies (which some people fix them up as they touch
the code), but that doesn't mean new inconsistencies should be introduced.


Please see my note in the PS.


+#undef MAX_STACK_ALIGNMENT
+#define MAX_STACK_ALIGNMENT __UINT32_MAX__
+


This is wrong for 2 reasons:


Doh! Sorry about that! This was leftover from an experiment I did
at one point and forgot to take out. (You do not need to explain
why it would be wrong to commit, it's obvious.)


switch (DECL_FUNCTION_CODE (fndecl))
  {
+case BUILT_IN_ALLOCA_WITH_ALIGN:
+  {
+   /* Get the requested alignment (in bits) if it's a constant
+  integer expression.  */
+   unsigned HOST_WIDE_INT align =
+ TREE_CODE (args[1]) == INTEGER_CST ? tree_to_uhwi (args[1]) : 0;


Besides formatting, not all INTEGER_CSTs fit into uhwi.
So, you should use instead
unsigned HOST_WIDE_INT align
  = tree_fits_uhwi_p (args[1]) : tree_to_uhwi (args[1]) : 0;


Thanks. I haven't been able to cause the original code to misbehave
but I vaguely recall running into a problem (ICE?) with one of my
previous changes where I didn't check tree_fits_uhwi_p() in a compiler
built for a 32-bit host.  If that's possible it would be nice if this
trap could somehow be pointed out at compile time via a warning.




+   /* Determine if the requested alignment is a power of 2 greater
+  than CHAR_BIT.  */
+   if ((align & (align - 1)) == 0)
+ align >>= LOG2_BITS_PER_UNIT;
+   else
+ align = 0;


Ugh, why the shifting?  The alignment is in bits, and the alignment in bits
must be a power of two, and the alignment in bits must fit into host
unsigned int.


I shifted the alignment so that it could be compared against
MAX_STACK_ALIGNMENT.  But after some searching it seems as though
MAX_STACK_ALIGNMENT is in bits, rather than bytes as I had assumed,
so I've removed the shift.

(It would be helpful if the unit of MAX_STACK_ALIGNMENT was mentioned
in a comment in defaults.h where the macro is defined.)

With that cleared up, you seem to be suggesting that the alignment
argument shouldn't be checked against the macro.  I see
MAX_STACK_ALIGNMENT being used as the upper bound on stack alignment
in check_cxx_fundamental_alignment_constraints.   I also see that both
the C and C++ front ends reject stack-based variable declarations
aligned on a stricter boundary, such as:

  void foo (void) {
_Alignas (1 << 29) int i;// alignas in C++
  }

(Although the check for _Alignas and C++ alignas uses
(HOST_BITS_PER_INT - BITS_PER_UNIT_LOG)).

Is there a specific reason why you're advising against using
MAX_STACK_ALIGNMENT here?


Thus, instead of the above do just
if ((align & (align - 1)) != 0)
  align = 0;

/* Reject invalid alignments.  */
if (align < BITS_PER_UNIT || (unsigned int) align != align)
or better
/* Reject invalid alignments.  */
if ((align & (align - 1)) != 0
|| align < BITS_PER_UNIT
|| (unsigned int) align != align)


It's not obvious to me that this is guaranteed to be correct.
IMO, even if it happens to be, I find it much clearer to check
against MAX_STACK_ALIGNMENT (or whatever macro describes the
limit if not this one).


and then
  {
error_at (EXPR_LOC_OR_LOC (args[1], input_location),
  "second argument to function %qE must be a constant "
  "integer power of 2 between %qi and %qu",
  fndecl, BITS_PER_UNIT, ~0U);
Or if you don't like ~0U, you can use INTTYPE_MAXIMUM (unsigned int),
but for unsigned type it will do the same thing.


That would be incorrect because ~0U isn't neither a power of 2, nor
the enforced stack alignment.  Again, using the actual limit encoded
in MAX_STACK_ALIGNMENT seems correct and IMO results in much clearer
code.


--- gcc/doc/extend.texi (revision 233476)
+++ gcc/doc/extend.texi (working copy)
@@ -10144,6 +10144,8 @@ in the Cilk Plus language manual which c
  @node Other Builtins
  @section Other Built-in Functions Provided by GCC
  @cindex built-in functions
+@findex __builtin_alloca
+@findex __builtin_alloca_with_align


I'd prefer not to mix the documentation patch 

C++ PATCH for c++/69842 (wrong error with generic lambda)

2016-02-17 Thread Jason Merrill
The problem here was that the call from the stub returned by the 
conversion function to the op() was changing an xvalue to an lvalue, 
leading to a parameter of the wrong type in the op().


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 54448906d76dd9df2523e6c3950ed368cb63b4de
Author: Jason Merrill 
Date:   Wed Feb 17 15:20:02 2016 -0500

	PR c++/69842
	* method.c (forward_parm): Split out from...
	(add_one_base_init): ...here.
	* lambda.c (maybe_add_lambda_conv_op): Use it.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3b91089..7800ae8 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6013,6 +6013,7 @@ extern tree make_thunk(tree, bool, tree, tree);
 extern void finish_thunk			(tree);
 extern void use_thunk(tree, bool);
 extern bool trivial_fn_p			(tree);
+extern tree forward_parm			(tree);
 extern bool is_trivially_xible			(enum tree_code, tree, tree);
 extern tree get_defaulted_eh_spec		(tree);
 extern tree unevaluated_noexcept_spec		(void);
diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index 93b192c..296c6f7 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -962,7 +962,9 @@ maybe_add_lambda_conv_op (tree type)
 	  }
 	else
 	  {
-		tree a = convert_from_reference (tgt);
+		++processing_template_decl;
+		tree a = forward_parm (tgt);
+		--processing_template_decl;
 		CALL_EXPR_ARG (call, ix) = a;
 		if (decltype_call)
 		  CALL_EXPR_ARG (decltype_call, ix) = copy_node (a);
diff --git a/gcc/cp/method.c b/gcc/cp/method.c
index e358ebd..f455b32 100644
--- a/gcc/cp/method.c
+++ b/gcc/cp/method.c
@@ -474,6 +474,19 @@ trivial_fn_p (tree fn)
   return type_has_trivial_fn (DECL_CONTEXT (fn), special_function_p (fn));
 }
 
+/* PARM is a PARM_DECL for a function which we want to forward to another
+   function without changing its value category, a la std::forward.  */
+
+tree
+forward_parm (tree parm)
+{
+  tree exp = convert_from_reference (parm);
+  if (TREE_CODE (TREE_TYPE (parm)) != REFERENCE_TYPE
+  || TYPE_REF_IS_RVALUE (TREE_TYPE (parm)))
+exp = move (exp);
+  return exp;
+}
+
 /* Subroutine of do_build_copy_constructor: Add a mem-initializer for BINFO
given the parameter or parameters PARM, possibly inherited constructor
base INH, or move flag MOVE_P.  */
@@ -494,10 +507,7 @@ add_one_base_init (tree binfo, tree parm, bool move_p, tree inh,
   init = NULL_TREE;
   for (; parm; parm = DECL_CHAIN (parm))
 	{
-	  tree exp = convert_from_reference (parm);
-	  if (TREE_CODE (TREE_TYPE (parm)) != REFERENCE_TYPE
-	  || TYPE_REF_IS_RVALUE (TREE_TYPE (parm)))
-	exp = move (exp);
+	  tree exp = forward_parm (parm);
 	  *p = build_tree_list (NULL_TREE, exp);
 	  p = _CHAIN (*p);
 	}
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-conv1.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-conv1.C
new file mode 100644
index 000..6569af4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-conv1.C
@@ -0,0 +1,14 @@
+// PR c++/69842
+// { dg-do compile { target c++14 } }
+
+template  struct same;
+template  struct same {};
+
+int main()
+{
+  auto g = [](auto && _var) {
+same();
+  };
+
+  g(0);
+}


Re: [PATCH] xtensa: fix libgcc build with --text-section-literals

2016-02-17 Thread Max Filippov
On Wed, Feb 17, 2016 at 10:59 PM, augustine.sterl...@gmail.com
 wrote:
> On Tue, Feb 16, 2016 at 4:35 PM, Max Filippov  wrote:
>> Functions __muldf3_aux, __divdf3_aux, __mulsf3_aux and __divsf3_aux
>> don't start with leaf_entry, so they need explicit .literal_position,
>> otherwise libgcc build fails in the presence of --text-section-literals.
>>
>> 2016-02-17  Max Filippov  
>> libgcc/
>> * config/xtensa/ieee754-df.S (__muldf3_aux, __divdf3_aux): Add
>> .literal_position before the function.
>> * config/xtensa/ieee754-sf.S (__mulsf3_aux, __divsf3_aux):
>> Likewise.
>
> This is OK, please submit.

Applied to trunk. Thank you!

-- Max


Re: [PATCH] xtensa: fix libgcc build with --text-section-literals

2016-02-17 Thread augustine.sterl...@gmail.com
On Tue, Feb 16, 2016 at 4:35 PM, Max Filippov  wrote:
> Functions __muldf3_aux, __divdf3_aux, __mulsf3_aux and __divsf3_aux
> don't start with leaf_entry, so they need explicit .literal_position,
> otherwise libgcc build fails in the presence of --text-section-literals.
>
> 2016-02-17  Max Filippov  
> libgcc/
> * config/xtensa/ieee754-df.S (__muldf3_aux, __divdf3_aux): Add
> .literal_position before the function.
> * config/xtensa/ieee754-sf.S (__mulsf3_aux, __divsf3_aux):
> Likewise.

This is OK, please submit.


Re: [PATCH] Fix a C++ -Wnonnull-compare regression (PR c++/69850)

2016-02-17 Thread Jason Merrill

OK.

Jason


[PATCH 9/9] S/390: z13 Add missing commutative operand markers.

2016-02-17 Thread Andreas Krebbel
gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/vector.md: Add missing commutative operand markers
to the patterns which qualify for one.
* config/s390/vx-builtins.md: Likewise.
---
 gcc/config/s390/vector.md  | 44 +-
 gcc/config/s390/vx-builtins.md | 44 +-
 2 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 3101057..cc3287c 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -453,8 +453,8 @@
 ; operation into two DImode ADDs.
 (define_insn "add3"
   [(set (match_operand:VIT   0 "nonimmediate_operand" "=v")
-   (plus:VIT (match_operand:VIT 1 "nonimmediate_operand"  "v")
- (match_operand:VIT 2 "general_operand"  "v")))]
+   (plus:VIT (match_operand:VIT 1 "nonimmediate_operand" "%v")
+ (match_operand:VIT 2 "general_operand"   "v")))]
   "TARGET_VX"
   "va\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -471,7 +471,7 @@
 ; vmlb, vmlhw, vmlf
 (define_insn "mul3"
   [(set (match_operand:VI_QHS  0 "register_operand" "=v")
-   (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand"  "v")
+   (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand" "%v")
 (match_operand:VI_QHS 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vml\t%v0,%v1,%v2"
@@ -526,7 +526,7 @@
 
 (define_insn "and3"
   [(set (match_operand:VT 0 "register_operand" "=v")
-   (and:VT (match_operand:VT 1 "register_operand"  "v")
+   (and:VT (match_operand:VT 1 "register_operand" "%v")
(match_operand:VT 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vn\t%v0,%v1,%v2"
@@ -537,7 +537,7 @@
 
 (define_insn "ior3"
   [(set (match_operand:VT 0 "register_operand" "=v")
-   (ior:VT (match_operand:VT 1 "register_operand"  "v")
+   (ior:VT (match_operand:VT 1 "register_operand" "%v")
(match_operand:VT 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vo\t%v0,%v1,%v2"
@@ -548,7 +548,7 @@
 
 (define_insn "xor3"
   [(set (match_operand:VT 0 "register_operand" "=v")
-   (xor:VT (match_operand:VT 1 "register_operand"  "v")
+   (xor:VT (match_operand:VT 1 "register_operand" "%v")
(match_operand:VT 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vx\t%v0,%v1,%v2"
@@ -765,7 +765,7 @@
 ; vmnb, vmnh, vmnf, vmng
 (define_insn "smin3"
   [(set (match_operand:VI  0 "register_operand" "=v")
-   (smin:VI (match_operand:VI 1 "register_operand"  "v")
+   (smin:VI (match_operand:VI 1 "register_operand" "%v")
 (match_operand:VI 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmn\t%v0,%v1,%v2"
@@ -774,7 +774,7 @@
 ; vmxb, vmxh, vmxf, vmxg
 (define_insn "smax3"
   [(set (match_operand:VI  0 "register_operand" "=v")
-   (smax:VI (match_operand:VI 1 "register_operand"  "v")
+   (smax:VI (match_operand:VI 1 "register_operand" "%v")
 (match_operand:VI 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmx\t%v0,%v1,%v2"
@@ -783,7 +783,7 @@
 ; vmnlb, vmnlh, vmnlf, vmnlg
 (define_insn "umin3"
   [(set (match_operand:VI  0 "register_operand" "=v")
-   (umin:VI (match_operand:VI 1 "register_operand"  "v")
+   (umin:VI (match_operand:VI 1 "register_operand" "%v")
 (match_operand:VI 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmnl\t%v0,%v1,%v2"
@@ -792,7 +792,7 @@
 ; vmxlb, vmxlh, vmxlf, vmxlg
 (define_insn "umax3"
   [(set (match_operand:VI  0 "register_operand" "=v")
-   (umax:VI (match_operand:VI 1 "register_operand"  "v")
+   (umax:VI (match_operand:VI 1 "register_operand" "%v")
 (match_operand:VI 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmxl\t%v0,%v1,%v2"
@@ -800,8 +800,8 @@
 
 ; vmeb, vmeh, vmef
 (define_insn "vec_widen_smult_even_"
-  [(set (match_operand:0 "register_operand" 
"=v")
-   (unspec: [(match_operand:VI_QHS 1 "register_operand"  "v")
+  [(set (match_operand: 0 "register_operand" "=v")
+   (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v")
  (match_operand:VI_QHS 2 "register_operand"  "v")]
 UNSPEC_VEC_SMULT_EVEN))]
   "TARGET_VX"
@@ -811,7 +811,7 @@
 ; vmleb, vmleh, vmlef
 (define_insn "vec_widen_umult_even_"
   [(set (match_operand: 0 "register_operand" "=v")
-   (unspec: [(match_operand:VI_QHS 1 "register_operand"  "v")
+   (unspec: [(match_operand:VI_QHS 1 "register_operand" "%v")
  (match_operand:VI_QHS 2 "register_operand"  "v")]
 UNSPEC_VEC_UMULT_EVEN))]
   "TARGET_VX"
@@ -821,7 +821,7 @@
 ; vmob, vmoh, vmof
 (define_insn "vec_widen_smult_odd_"
   [(set (match_operand: 0 "register_operand" "=v")
-   

[PATCH 1/9] S/390: Add IBM z13 pipeline description

2016-02-17 Thread Andreas Krebbel
This patch adds proper support for the -mtune=z13 option by adding a
z13 pipeline description.  As started with zEC12 we mostly make use of
the sched reorder hooks to implement a grouping strategy.  However,
this time we also keep an eye at the instruction mix provided in the
out of order window to allow the hardware to exploit the different
units.

gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/2827.md: Rename ooo_* insn attributes to zEC12_*.
* config/s390/2964.md: New file.
* config/s390/s390.c (s390_get_sched_attrmask): Use the right set
of insn grouping attributes depending on the CPU level.
(s390_get_unit_mask): New function.
(s390_sched_score): Remove the OOO from the scheduling macros.
Add loop to calculate a score for the instruction mix.
(s390_sched_reorder): Likewise plus improve debug output.
(s390_sched_variable_issue): Rename macros as above.  Calculate
the unit distances after actually scheduling an insn.  Improve
debug output.
(s390_sched_init): Clear last_scheduled_unit_distance array.
* config/s390/s390.md: Include 2964.md.
---
 gcc/config/s390/2827.md |   9 +-
 gcc/config/s390/2964.md | 232 +++
 gcc/config/s390/s390.c  | 259 
 gcc/config/s390/s390.md |   3 +
 4 files changed, 435 insertions(+), 68 deletions(-)
 create mode 100644 gcc/config/s390/2964.md

diff --git a/gcc/config/s390/2827.md b/gcc/config/s390/2827.md
index 7baf990..21a5ee9 100644
--- a/gcc/config/s390/2827.md
+++ b/gcc/config/s390/2827.md
@@ -18,20 +18,19 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; .
 
-
-(define_attr "ooo_cracked" ""
+(define_attr "zEC12_cracked" ""
   (cond [(eq_attr "mnemonic" 
"cgdbr,clfxtr,cdgtr,celfbr,cxgtr,clfebr,clc,lngfr,cs,cfxbr,xc,clfdbr,basr,ex,cxlgtr,clfdtr,srdl,lpgfr,cdlgbr,cgxtr,cxlftr,nc,cxftr,cdfbr,clfxbr,cdftr,clgxbr,cgdtr,cxlgbr,mvc,clgdtr,cegbr,cfebr,cdlftr,sldl,cdlgtr,csg,chhsi,clgebr,cxgbr,cxfbr,cdlfbr,cgebr,lzxr,oc,cdgbr,brasl,cgxbr,cxlfbr,clgxtr,exrl,cfdbr,celgbr,clgdbr,lxr,cpsdr,lcgfr,bras,srda,cefbr")
 (const_int 1)]
 (const_int 0)))
 
-(define_attr "ooo_expanded" ""
+(define_attr "zEC12_expanded" ""
   (cond [(eq_attr "mnemonic" 
"dlr,dsgr,d,dsgf,stam,dsgfr,dlgr,dsg,cds,dr,stm,mvc,dl,cdsg,stmy,dlg,stmg,lam") 
(const_int 1)]
 (const_int 0)))
 
-(define_attr "ooo_endgroup" ""
+(define_attr "zEC12_endgroup" ""
   (cond [(eq_attr "mnemonic" "ipm") (const_int 1)]
 (const_int 0)))
 
-(define_attr "ooo_groupalone" ""
+(define_attr "zEC12_groupalone" ""
   (cond [(eq_attr "mnemonic" 
"lnxbr,madb,ltxtr,clc,axtr,msebr,slbgr,xc,alcr,lpxbr,slbr,maebr,mlg,mfy,lxdtr,maeb,lxeb,nc,mxtr,sxtr,dxbr,alc,msdbr,ltxbr,lxdb,madbr,lxdbr,lxebr,mvc,m,mseb,mlr,mlgr,slb,tcxb,msdb,sqxbr,alcgr,oc,flogr,alcg,mxbr,dxtr,axbr,mr,sxbr,slbg,ml,lcxbr,bcr_flush")
 (const_int 1)]
 (const_int 0)))
 
diff --git a/gcc/config/s390/2964.md b/gcc/config/s390/2964.md
new file mode 100644
index 000..d2211e1
--- /dev/null
+++ b/gcc/config/s390/2964.md
@@ -0,0 +1,232 @@
+;; Scheduling description for z13.
+;;   Copyright (C) 2016 Free Software Foundation, Inc.
+;;   Contributed by Andreas Krebbel (andreas.kreb...@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+
+; generator options: vector_ecycs=12 cracked_ecycs=6 scale_ecycs=5
+
+(define_attr "z13_cracked" ""
+  (cond [(eq_attr "mnemonic" "celgbr,vscef,vsceg,exrl,clfebr,cefbr,chhsi,\
+vgef,vgeg,cdlftr,lcgfr,cfdbr,cgdbr,lzxr,cfxbr,rnsbg,cgdtr,cegbr,rxsbg,ex,\
+cgxtr,clfxtr,cdlgtr,brasl,efpc,cfebr,tbeginc,celfbr,clgxbr,vsteb,vsteh,\
+clfdtr,cdfbr,lngfr,clgebr,stpq,cs,lpgfr,cdlgbr,lpq,cdgtr,d,cgxbr,cdftr,\
+rosbg,clgdbr,cdgbr,bras,tbegin,clfdbr,cdlfbr,cgebr,clfxbr,lxr,csy,csg,clgdtr,\
+clgxtr") (const_int 1)]
+(const_int 0)))
+
+(define_attr "z13_expanded" ""
+  (cond [(eq_attr "mnemonic" "cxlftr,cdsg,cdsy,stam,lam,dsgf,lmg,cxlgtr,\
+dl,cxftr,sldl,dsg,cxlfbr,cxgtr,stmg,stmy,stm,lm,cds,lmy,cxfbr,cxlgbr,srda,\
+srdl,cxgbr,dlg") (const_int 1)]
+(const_int 0)))
+
+(define_attr "z13_groupalone" ""
+  (cond [(eq_attr "mnemonic" "mvc,dxbr,lxebr,axtr,cxtr,alcr,lxdb,lxeb,mxtr,\

[PATCH 0/9] S/390: z13 pipeline description, stpcpy + bugfixes

2016-02-17 Thread Andreas Krebbel
I'm having this patchset in my local tree for quite a while now.
Posting it was so far prevented by some internal process hurdles.  I'm
aware it isn't stage 4 material.  I nevertheless would like to commit
this since:

* It is z13 only and z13 support was new in GCC 6 anyway.  The risk to
  cause regressions for other cpu levels is small (hopefully).

* It is required to get rid of some nasty performance regressions
  which can be observed with -march=z13 otherwise.

Any objections?

Bye,

-Andreas-

Andreas Krebbel (9):
  S/390: Add IBM z13 pipeline description
  S/390: z13 lcbb fix address operand.
  S/390: z13 inline stpcpy implementation.
  S/390: Adjust movstr-1.c testcase to work with the z13 stpcpy
implementation.
  S/390: z13 fix mode in vcond expansion
  S/390: Add vec_sub_u128 to vecintrin.h
  S/390: z13 Change predicates of 128 bit add sub.
  S/390: Add single element vector types to iterators.
  S/390: z13 Add missing commutative operand markers.

 gcc/config/s390/2827.md|   9 +-
 gcc/config/s390/2964.md|  64 
 gcc/config/s390/s390-protos.h  |   1 +
 gcc/config/s390/s390.c | 381 +
 gcc/config/s390/s390.md|  19 +-
 gcc/config/s390/vecintrin.h|   1 +
 gcc/config/s390/vector.md  |  60 ++--
 gcc/config/s390/vx-builtins.md |  56 +--
 gcc/testsuite/gcc.target/s390/md/movstr-1.c|   2 +-
 gcc/testsuite/gcc.target/s390/md/movstr-2.c|  98 ++
 gcc/testsuite/gcc.target/s390/vector/int128-1.c|  47 +++
 gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c |  23 ++
 12 files changed, 628 insertions(+), 133 deletions(-)
 create mode 100644 gcc/config/s390/2964.md
 create mode 100644 gcc/testsuite/gcc.target/s390/md/movstr-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/int128-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c

-- 
1.9.1



[PATCH 7/9] S/390: z13 Change predicates of 128 bit add sub.

2016-02-17 Thread Andreas Krebbel
So far usage of 128 bit add/sub instruction was rejected if the second
operand was a constant because the predicate rejected this.

gcc/testsuite/ChangeLog:

2016-02-17  Andreas Krebbel  

* gcc.target/s390/vector/int128-1.c: New test.

gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/vector.md ("add3", "sub3"):
Change the predicate of op2 from nonimmediate to general and let
reload fix it if necessary.
---
 gcc/config/s390/vector.md   |  4 +--
 gcc/testsuite/gcc.target/s390/vector/int128-1.c | 47 +
 2 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/int128-1.c

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 2302a8f..cdb9ba6 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -454,7 +454,7 @@
 (define_insn "add3"
   [(set (match_operand:VIT   0 "nonimmediate_operand" "=v")
(plus:VIT (match_operand:VIT 1 "nonimmediate_operand"  "v")
- (match_operand:VIT 2 "nonimmediate_operand"  "v")))]
+ (match_operand:VIT 2 "general_operand"  "v")))]
   "TARGET_VX"
   "va\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -463,7 +463,7 @@
 (define_insn "sub3"
   [(set (match_operand:VIT0 "nonimmediate_operand" "=v")
(minus:VIT (match_operand:VIT 1 "nonimmediate_operand"  "v")
-  (match_operand:VIT 2 "nonimmediate_operand"  "v")))]
+  (match_operand:VIT 2 "general_operand"  "v")))]
   "TARGET_VX"
   "vs\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
diff --git a/gcc/testsuite/gcc.target/s390/vector/int128-1.c 
b/gcc/testsuite/gcc.target/s390/vector/int128-1.c
new file mode 100644
index 000..b4a16b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/int128-1.c
@@ -0,0 +1,47 @@
+/* Check that vaq/vsq are used for int128 operations.  */
+
+/* { dg-do compile { target { lp64 } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+
+const __int128 c = (__int128)0x0123456789abcd55 + ((__int128)7 << 64);
+
+
+__int128
+addreg(__int128 a, __int128 b)
+{
+  return a + b;
+}
+
+__int128
+addconst(__int128 a)
+{
+  return a + c;
+}
+
+__int128
+addmem(__int128 *a, __int128_t *b)
+{
+  return *a + *b;
+}
+
+__int128
+subreg(__int128 a, __int128 b)
+{
+  return a - b;
+}
+
+__int128
+subconst(__int128 a)
+{
+  return a - c; /* This becomes vaq as well.  */
+}
+
+__int128
+submem(__int128 *a, __int128_t *b)
+{
+  return *a - *b;
+}
+
+/* { dg-final { scan-assembler-times "vaq" 4 } } */
+/* { dg-final { scan-assembler-times "vsq" 2 } } */
-- 
1.9.1



[PATCH 5/9] S/390: z13 fix mode in vcond expansion

2016-02-17 Thread Andreas Krebbel
For floating point vector compares the target mode is an integer mode
which accidently was used as register mode when forcing the compare
operands into regs.

gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/s390.c (s390_expand_vcond): Use the compare operand
mode.

gcc/testsuite/ChangeLog:

2016-02-17  Andreas Krebbel  

* gcc.target/s390/vector/vec-vcond-1.c: New test.
---
 gcc/config/s390/s390.c |  4 ++--
 gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c | 23 ++
 2 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index da05a04..cd53b15 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -6329,10 +6329,10 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
  can be handled by the optimization above but not by the
  following code.  Hence, force them into registers here.  */
   if (!REG_P (cmp_op1))
-cmp_op1 = force_reg (target_mode, cmp_op1);
+cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
 
   if (!REG_P (cmp_op2))
-cmp_op2 = force_reg (target_mode, cmp_op2);
+cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
 
   s390_expand_vec_compare (result_target, cond,
   cmp_op1, cmp_op2);
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c
new file mode 100644
index 000..ec65c6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-vcond-1.c
@@ -0,0 +1,23 @@
+/* A const vector operand is forced into a register in
+   s390_expand_vcond.
+   This testcase once failed because the target mode (v2di) was picked
+   for the reg instead of the mode of the other comparison
+   operand.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+typedef __attribute__((vector_size(16))) long   v2di;
+typedef __attribute__((vector_size(16))) double v2df;
+
+v2di
+foo (v2df a)
+{
+  return a == (v2df){ 0.0, 0.0 };
+}
+
+v2di
+bar (v2df a)
+{
+  return (v2df){ 1.0, 1.0 } == (v2df){ 0.0, 0.0 };
+}
-- 
1.9.1



[PATCH 8/9] S/390: Add single element vector types to iterators.

2016-02-17 Thread Andreas Krebbel
gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/vector.md (VI, VI_QHS): Add single element vector
types to mode iterators.
(vec_double): ... and mode attribute.
* config/s390/vx-builtins.md (non_vec_int): Likewise.
---
 gcc/config/s390/vector.md  | 14 +++---
 gcc/config/s390/vx-builtins.md | 12 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index cdb9ba6..3101057 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -43,8 +43,8 @@
 
 ; All integer vector modes supported in a vector register + TImode
 (define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI 
V2SI V4SI V1DI V2DI V1TI TI])
-(define_mode_iterator VI  [V2QI V4QI V8QI V16QI V2HI V4HI V8HI V2SI V4SI V2DI])
-(define_mode_iterator VI_QHS [V4QI V8QI V16QI V4HI V8HI V4SI])
+(define_mode_iterator VI  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI 
V2SI V4SI V1DI V2DI])
+(define_mode_iterator VI_QHS [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI 
V1SI V2SI V4SI])
 
 (define_mode_iterator V_8   [V1QI])
 (define_mode_iterator V_16  [V2QI  V1HI])
@@ -100,11 +100,11 @@
(V1TF "V1TI")])
 
 ; Vector with doubled element size.
-(define_mode_attr vec_double [(V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI 
"V8HI")
- (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI")
- (V2SI "V1DI") (V4SI "V2DI")
- (V2DI "V1TI")
- (V2SF "V1DF") (V4SF "V2DF")])
+(define_mode_attr vec_double [(V1QI "V1HI") (V2QI "V1HI") (V4QI "V2HI") (V8QI 
"V4HI") (V16QI "V8HI")
+ (V1HI "V1SI") (V2HI "V1SI") (V4HI "V2SI") (V8HI 
"V4SI")
+ (V1SI "V1DI") (V2SI "V1DI") (V4SI "V2DI")
+ (V1DI "V1TI") (V2DI "V1TI")
+ (V1SF "V1DF") (V2SF "V1DF") (V4SF "V2DF")])
 
 ; Vector with half the element size.
 (define_mode_attr vec_half [(V1HI "V2QI") (V2HI "V4QI") (V4HI "V8QI") (V8HI 
"V16QI")
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 81a2d07..65e683c9 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -28,12 +28,12 @@
 
 ; The element type of the vector with floating point modes translated
 ; to int modes of the same size.
-(define_mode_attr non_vec_int[(V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI")
- (V2HI "HI") (V4HI "HI") (V8HI "HI")
- (V2SI "SI") (V4SI "SI")
- (V2DI "DI")
- (V2SF "SI") (V4SF "SI")
- (V2DF "DI")])
+(define_mode_attr non_vec_int[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") 
(V16QI "QI")
+ (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI")
+ (V1SI "SI") (V2SI "SI") (V4SI "SI")
+ (V1DI "DI") (V2DI "DI")
+ (V1SF "SI") (V2SF "SI") (V4SF "SI")
+ (V1DF "DI") (V2DF "DI")])
 
 ; Condition code modes generated by int comparisons
 (define_mode_iterator VICMP [CCVEQ CCVH CCVHU])
-- 
1.9.1



[PATCH 3/9] S/390: z13 inline stpcpy implementation.

2016-02-17 Thread Andreas Krebbel
A handwritten loop for stpcpy using the new z13 vector instructions
appears to be much faster than the millicoded instruction.  However,
the implementation is much longer and therefore will only be enabled
when optimization for speed.

gcc/testsuite/ChangeLog:

2016-02-17  Andreas Krebbel  

* gcc.target/s390/md/movstr-2.c: New test.

gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/s390-protos.h: Add s390_expand_vec_movstr prototype.
* config/s390/s390.c (s390_expand_vec_movstr): New function.
* config/s390/s390.md ("movstr"): Call
s390_expand_vec_movstr.
---
 gcc/config/s390/s390-protos.h   |   1 +
 gcc/config/s390/s390.c  | 118 
 gcc/config/s390/s390.md |  12 ++-
 gcc/testsuite/gcc.target/s390/md/movstr-2.c |  98 +++
 4 files changed, 227 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/md/movstr-2.c

diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 09032c9..792eaa7 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -109,6 +109,7 @@ extern bool s390_expand_movmem (rtx, rtx, rtx);
 extern void s390_expand_setmem (rtx, rtx, rtx);
 extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
 extern void s390_expand_vec_strlen (rtx, rtx, rtx);
+extern void s390_expand_vec_movstr (rtx, rtx, rtx);
 extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
 extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
 extern void s390_expand_cs_hqi (machine_mode, rtx, rtx, rtx,
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index c2e59f5..da05a04 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -5622,6 +5622,124 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx 
alignment)
 emit_move_insn (target, temp);
 }
 
+void
+s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
+{
+  int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+  rtx temp = gen_reg_rtx (Pmode);
+  rtx src_addr = XEXP (src, 0);
+  rtx dst_addr = XEXP (dst, 0);
+  rtx src_addr_reg = gen_reg_rtx (Pmode);
+  rtx dst_addr_reg = gen_reg_rtx (Pmode);
+  rtx offset = gen_reg_rtx (Pmode);
+  rtx vsrc = gen_reg_rtx (V16QImode);
+  rtx vpos = gen_reg_rtx (V16QImode);
+  rtx loadlen = gen_reg_rtx (SImode);
+  rtx gpos_qi = gen_reg_rtx(QImode);
+  rtx gpos = gen_reg_rtx (SImode);
+  rtx done_label = gen_label_rtx ();
+  rtx loop_label = gen_label_rtx ();
+  rtx exit_label = gen_label_rtx ();
+  rtx full_label = gen_label_rtx ();
+
+  /* Perform a quick check for string ending on the first up to 16
+ bytes and exit early if successful.  */
+
+  emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
+  emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
+  emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
+  emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
+  emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
+  /* gpos is the byte index if a zero was found and 16 otherwise.
+ So if it is lower than the loaded bytes we have a hit.  */
+  emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
+  full_label);
+  emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
+
+  force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
+ 1, OPTAB_DIRECT);
+  emit_jump (exit_label);
+  emit_barrier ();
+
+  emit_label (full_label);
+  LABEL_NUSES (full_label) = 1;
+
+  /* Calculate `offset' so that src + offset points to the last byte
+ before 16 byte alignment.  */
+
+  /* temp = src_addr & 0xf */
+  force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
+ 1, OPTAB_DIRECT);
+
+  /* offset = 0xf - temp */
+  emit_move_insn (offset, GEN_INT (15));
+  force_expand_binop (Pmode, sub_optab, offset, temp, offset,
+ 1, OPTAB_DIRECT);
+
+  /* Store `offset' bytes in the dstination string.  The quick check
+ has loaded at least `offset' bytes into vsrc.  */
+
+  emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
+
+  /* Advance to the next byte to be loaded.  */
+  force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
+ 1, OPTAB_DIRECT);
+
+  /* Make sure the addresses are single regs which can be used as a
+ base.  */
+  emit_move_insn (src_addr_reg, src_addr);
+  emit_move_insn (dst_addr_reg, dst_addr);
+
+  /* MAIN LOOP */
+
+  emit_label (loop_label);
+  LABEL_NUSES (loop_label) = 1;
+
+  emit_move_insn (vsrc,
+ gen_rtx_MEM (V16QImode,
+  gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
+
+  emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
+ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
+  add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
+   REG_BR_PROB, 

[PATCH 6/9] S/390: Add vec_sub_u128 to vecintrin.h

2016-02-17 Thread Andreas Krebbel
This adds a missing macro to the vecintrin.h header file.

gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/vecintrin.h (vec_sub_u128): Define missing macro.
---
 gcc/config/s390/vecintrin.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h
index b9742ec..ab82e7a 100644
--- a/gcc/config/s390/vecintrin.h
+++ b/gcc/config/s390/vecintrin.h
@@ -80,6 +80,7 @@ __lcbb(const void *ptr, int bndry)
 #define vec_checksum __builtin_s390_vcksm
 #define vec_gfmsum_128 __builtin_s390_vgfmg
 #define vec_gfmsum_accum_128 __builtin_s390_vgfmag
+#define vec_sub_u128 __builtin_s390_vsq
 #define vec_subc_u128 __builtin_s390_vscbiq
 #define vec_sube_u128 __builtin_s390_vsbiq
 #define vec_subec_u128 __builtin_s390_vsbcbiq
-- 
1.9.1



[PATCH 4/9] S/390: Adjust movstr-1.c testcase to work with the z13 stpcpy implementation.

2016-02-17 Thread Andreas Krebbel
2016-02-17  Andreas Krebbel  

* gcc.target/s390/md/movstr-1.c: Allow also the z13 strings
instruction pattern name to prevent the testcase from failing with
-march=z13.
---
 gcc/testsuite/gcc.target/s390/md/movstr-1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/s390/md/movstr-1.c 
b/gcc/testsuite/gcc.target/s390/md/movstr-1.c
index 7da749b..da98415 100644
--- a/gcc/testsuite/gcc.target/s390/md/movstr-1.c
+++ b/gcc/testsuite/gcc.target/s390/md/movstr-1.c
@@ -9,7 +9,7 @@ void test(char *dest, const char *src)
   __builtin_stpcpy (dest, src);
 }
 
-/* { dg-final { scan-assembler-times {{[*]movstr}} 1 } } */
+/* { dg-final { scan-assembler-times {{[*]movstr}|{vec_vfenesv16qi}} 1 } } */
 
 #define LEN 200
 char buf[LEN];
-- 
1.9.1



[PATCH 2/9] S/390: z13 lcbb fix address operand.

2016-02-17 Thread Andreas Krebbel
gcc/ChangeLog:

2016-02-17  Andreas Krebbel  

* config/s390/s390.md: Add missing output modifier for operand 1
to print it as address properly.
---
 gcc/config/s390/s390.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 9d76e61..55ae705 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -10913,11 +10913,11 @@
 
 (define_insn "lcbb"
   [(set (match_operand:SI 0 "register_operand"  "=d")
-   (unspec:SI [(match_operand:SI 1 "address_operand" "ZQZR")
+   (unspec:SI [(match_operand1 "address_operand" "ZQZR")
(match_operand:SI 2 "immediate_operand"  "C")] UNSPEC_LCBB))
(clobber (reg:CC CC_REGNUM))]
   "TARGET_Z13"
-  "lcbb\t%0,%1,%b2"
+  "lcbb\t%0,%a1,%b2"
   [(set_attr "op_type" "VRX")])
 
 ; Handle -fsplit-stack.
-- 
1.9.1



[PATCH] Fix Cilk+ #pragma cilk grainsize preprocessing (PR c++/69826)

2016-02-17 Thread Jakub Jelinek
Hi!

The following testcase works unless -save-temps or ccache is used
(or manually performing -E and compilation separately).  The problem
is that #pragma cilk grainsize is supposed to have macro expansion
(except for the grainsize keyword), but we weren't enabling that for -E.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2016-02-17  Jakub Jelinek  

PR c++/69826
* c-pragma.c (c_pp_lookup_pragma): Handle PRAGMA_CILK_GRAINSIZE.
(init_pragma): Register PRAGMA_CILK_GRAINSIZE even for
flag_preprocess_only.

* c-c++-common/cilk-plus/CK/pr69826-1.c: New test.
* c-c++-common/cilk-plus/CK/pr69826-2.c: New test.

--- gcc/c-family/c-pragma.c.jj  2016-02-01 23:35:05.0 +0100
+++ gcc/c-family/c-pragma.c 2016-02-17 14:48:37.489399494 +0100
@@ -1336,6 +1336,13 @@ c_pp_lookup_pragma (unsigned int id, con
   return;
 }
 
+  if (id == PRAGMA_CILK_GRAINSIZE)
+{
+  *space = "cilk";
+  *name = "grainsize";
+  return;
+}
+
   if (id >= PRAGMA_FIRST_EXTERNAL
   && (id < PRAGMA_FIRST_EXTERNAL + registered_pp_pragmas.length ()))
 {
@@ -1523,7 +1530,7 @@ init_pragma (void)
 cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, 
false,
  false);
 
-  if (flag_cilkplus && !flag_preprocess_only)
+  if (flag_cilkplus)
 cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
  PRAGMA_CILK_GRAINSIZE, true, false);
 
--- gcc/testsuite/c-c++-common/cilk-plus/CK/pr69826-1.c.jj  2016-02-17 
15:09:10.685152534 +0100
+++ gcc/testsuite/c-c++-common/cilk-plus/CK/pr69826-1.c 2016-02-17 
15:11:39.518070410 +0100
@@ -0,0 +1,25 @@
+/* { dg-do run { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#define GRAINSIZE 2
+
+int
+main ()
+{
+  int a[64];
+  #pragma cilk grainsize=GRAINSIZE
+  _Cilk_for (int i = 0; i < 64; i++)
+a[i] = 0;
+  #pragma cilk grainsize =GRAINSIZE
+  _Cilk_for (int i = 0; i < 64; i++)
+a[i]++;
+  #pragma cilk grainsize = GRAINSIZE
+  _Cilk_for (int i = 0; i < 64; i++)
+a[i]++;
+  for (int i = 0; i < 64; i++)
+if (a[i] != 2)
+  __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/c-c++-common/cilk-plus/CK/pr69826-2.c.jj  2016-02-17 
15:11:57.073824810 +0100
+++ gcc/testsuite/c-c++-common/cilk-plus/CK/pr69826-2.c 2016-02-17 
15:12:12.409610268 +0100
@@ -0,0 +1,6 @@
+/* { dg-do run { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-fcilkplus -save-temps" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+/* { dg-additional-options "-lcilkrts" { target { i?86-*-* x86_64-*-* } } } */
+
+#include "pr69826-1.c"

Jakub


[PATCH] Fix a C++ -Wnonnull-compare regression (PR c++/69850)

2016-02-17 Thread Jakub Jelinek
Hi!

As -Wnonnull -> -Wnonnull-compare warning has been moved from FE
to after going into SSA form, we have a problem because
delete ptr;
in C++ may introduce a comparison on its own, and on that artificial
comparison can complain about comparison of nonnull argument with NULL.

The following patch fixes this by setting TREE_NO_WARNING on it, making sure
it propagates up to the warn_nonnull_compare pass and not warning if it is
set.

While for "this" (after stripping nops) perhaps already the C++ FE could
optimize and assume this is always non-NULL (i.e. not generate the
comparison at all), in other cases, even if the arg is nonnull_arg_p, it
can't assume that, for the exact same reasons why the warning had to be
moved into the middle-end.  Because we can have:
__attribute__((nonnull)) void
foo (S *p, S *q, int a)
{
  if (a == 1)
delete p; // here if (p != NULL) could be optimized away, but the
  // FE doesn't know that
  else if (a == 2)
{
  bar (); // q could have changed here.
  delete q; // so, eventhough nonnull_arg_p (q), we have to if (q != NULL)
}
  else if (a == 3)
{
  p = baz (); // p value has changed, thus we need to test
  delete p;   // if (p != NULL) here.
}
}
In any case, as the source doesn't contain comparison of p != NULL in the a
== 1 case, we don't want to warn.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-02-17  Jakub Jelinek  

PR c++/69850
* gimplify.c (gimplify_cond_expr): Call gimple_set_no_warning
on the cond_stmt from TREE_NO_WARNING on COND_EXPR_COND.
* gimple-ssa-nonnull-compare.c (do_warn_nonnull_compare): Don't
warn on gimple_no_warning_p statements.

* init.c (build_delete): Set TREE_NO_WARNING on ifexp.

* g++.dg/warn/Wnonnull-compare-1.C: New test.

--- gcc/gimplify.c.jj   2016-02-16 21:42:57.0 +0100
+++ gcc/gimplify.c  2016-02-17 14:01:45.789728763 +0100
@@ -3219,6 +3219,7 @@ gimplify_cond_expr (tree *expr_p, gimple
 );
   cond_stmt = gimple_build_cond (pred_code, arm1, arm2, label_true,
 label_false);
+  gimple_set_no_warning (cond_stmt, TREE_NO_WARNING (COND_EXPR_COND (expr)));
   gimplify_seq_add_stmt (, cond_stmt);
   gimple_stmt_iterator gsi = gsi_last (seq);
   maybe_fold_stmt ();
--- gcc/gimple-ssa-nonnull-compare.c.jj 2016-02-16 21:46:02.0 +0100
+++ gcc/gimple-ssa-nonnull-compare.c2016-02-17 13:51:52.933996279 +0100
@@ -96,7 +96,8 @@ do_warn_nonnull_compare (function *fun,
  }
   if (op
  && (POINTER_TYPE_P (TREE_TYPE (arg))
- ? integer_zerop (op) : integer_minus_onep (op)))
+ ? integer_zerop (op) : integer_minus_onep (op))
+ && !gimple_no_warning_p (stmt))
warning_at (loc, OPT_Wnonnull_compare,
"nonnull argument %qD compared to NULL", arg);
 }
--- gcc/cp/init.c.jj2016-02-15 23:04:42.0 +0100
+++ gcc/cp/init.c   2016-02-17 13:54:32.597769729 +0100
@@ -4525,6 +4525,10 @@ build_delete (tree otype, tree addr, spe
complain));
  if (ifexp == error_mark_node)
return error_mark_node;
+ /* This is a compiler generated comparison, don't emit
+e.g. -Wnonnull-compare warning for it.  */
+ else if (TREE_CODE (ifexp) == NE_EXPR)
+   TREE_NO_WARNING (ifexp) = 1;
}
 
   if (ifexp != integer_one_node)
--- gcc/testsuite/g++.dg/warn/Wnonnull-compare-1.C.jj   2016-02-17 
14:04:51.644136979 +0100
+++ gcc/testsuite/g++.dg/warn/Wnonnull-compare-1.C  2016-02-17 
14:04:24.0 +0100
@@ -0,0 +1,9 @@
+// PR c++/69850
+// { dg-do compile }
+// { dg-options "-Wall" }
+
+struct C
+{
+  ~C () { delete this; }   // { dg-bogus "nonnull argument" }
+};
+C c;

Jakub


Re: [6 Regession] Usage of unitialized pointer io/list_read.c (

2016-02-17 Thread Jerry DeLisle
On 02/16/2016 05:37 PM, Jerry DeLisle wrote:
> See patch to fix this below.
> 

Committed on trunk, r233500 after regression testing, -fsanitize=address
testing, and valgrind testing.

Jerry


[PATCH] Fix up avx512* regressions caused by the cse.c one-liner change (PR target/69671)

2016-02-17 Thread Jakub Jelinek
Hi!

As I wrote in the PR, fwprop is able to forward CONST0_RTX back into
instructions even if CSE optimized them, but the problem in that case is
that for vector_move_operand "0C" operands if they appear inside of
(vec_select ... (parallel [(const_int 0) ... ])) the result is also
simplified, so one gets instead another CONST0_RTX (in the mode of
the VEC_SELECT).  Because the patterns expect a vec_select and "C" operand
inside of it, it is therefore not matched, it maybe attached as REG_EQUAL
note.  I went through other vector_move_operand "0C" and "0C,0" operands
and I don't think they suffer from similar problem, if fwprop or cprop etc.
attempts to propagate a constant into them, it shouldn't be possible it will
be simplified into something different.

Anyway, the fix IMHO is to just duplicate the affected 8 define_insns
with the simplification applied.  IMHO once we know it is {z}, it is worth
to keep it as {z}, there is no benefit to allow the RA to use "0"
operand instead.

Bootstrapped/regtested on x86_64-linux and i686-linux, on both fixes
the testcases that started failing with r233133, ok for trunk?

2016-02-17  Jakub Jelinek  

PR target/69671
* config/i386/sse.md (*floatv2div2sf2_mask_1,
*avx512vl_v2div2qi2_mask_1, *avx512vl_v4qi2_mask_1,
*avx512vl_v8qi2_mask_1, *avx512vl_v4hi2_mask_1,
*avx512vl_v2div2hi2_mask_1, *avx512vl_v2div2si2_mask_1,
*avx512f_v8div16qi2_mask_1): New insns.

--- gcc/config/i386/sse.md.jj   2016-02-15 22:22:46.0 +0100
+++ gcc/config/i386/sse.md  2016-02-17 12:25:50.528896579 +0100
@@ -4962,6 +4962,21 @@ (define_insn "floatv2div2sf
(set_attr "prefix" "evex")
(set_attr "mode" "V4SF")])
 
+(define_insn "*floatv2div2sf2_mask_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+(vec_concat:V4SF
+   (vec_merge:V2SF
+   (any_float:V2SF (match_operand:V2DI 1
+ "nonimmediate_operand" "vm"))
+   (const_vector:V2SF [(const_int 0) (const_int 0)])
+   (match_operand:QI 2 "register_operand" "Yk"))
+   (const_vector:V2SF [(const_int 0) (const_int 0)])))]
+  "TARGET_AVX512DQ && TARGET_AVX512VL"
+  "vcvtqq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V4SF")])
+
 (define_insn "ufloat2"
   [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
(unsigned_float:VF2_512_256VL
@@ -9150,6 +9165,27 @@ (define_insn "avx512vl_v2div2qi2_m
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
 
+(define_insn "*avx512vl_v2div2qi2_mask_1"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+(vec_concat:V16QI
+  (vec_merge:V2QI
+   (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+   (const_vector:V2QI [(const_int 0) (const_int 0)])
+   (match_operand:QI 2 "register_operand" "Yk"))
+  (const_vector:V14QI [(const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)])))]
+  "TARGET_AVX512VL"
+  "vpmovqb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "avx512vl_v2div2qi2_mask_store"
   [(set (match_operand:V16QI 0 "memory_operand" "=m")
 (vec_concat:V16QI
@@ -9219,6 +9255,27 @@ (define_insn "avx512vl_v4qi2
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
 
+(define_insn "*avx512vl_v4qi2_mask_1"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+(vec_concat:V16QI
+  (vec_merge:V4QI
+   (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+   (const_vector:V4QI [(const_int 0) (const_int 0)
+   (const_int 0) (const_int 0)])
+   (match_operand:QI 2 "register_operand" "Yk"))
+  (const_vector:V12QI [(const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)
+  (const_int 0) (const_int 0)])))]
+  "TARGET_AVX512VL"
+  "vpmov\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "avx512vl_v4qi2_mask_store"
   [(set (match_operand:V16QI 0 "memory_operand" "=m")
 (vec_concat:V16QI
@@ -9289,6 +9346,27 @@ (define_insn "avx512vl_v8qi2
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
 
+(define_insn "*avx512vl_v8qi2_mask_1"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+

Re: Partial Offloading (was: [hsa merge 07/10] IPA-HSA pass)

2016-02-17 Thread Ilya Verbin
On Thu, Jan 28, 2016 at 12:36:19 +0100, Thomas Schwinge wrote:
> I made an attempt to capture the recent discussion (plus my own
> ideas/understanding) in this new section:
> .  Please
> change/extend, as required.

Thanks for summarizing this.


I'm not very happy how -foffload=disable works in GCC 6, here is a testcase:

int main ()
{
  int x = 10;
  #pragma omp target data map (from: x)
#pragma omp target map (alloc: x)
  x = 20;
  if (x != 10 && x != 20)
__builtin_abort ();
}

On the system with non-shared accelerator it will abort, because "#pragma omp
target data" behaves like offloading is enabled, but "#pragma omp target" runs
on the host.  As the result, at the end of the *target data* region, it tries to
receive x from target and receives 0, or crashes.

We can forbid -foffload=disable option, but I think it's very useful, e.g. for
comparing performance of host vs. accelerator using the same compiler, etc.
Or if the system contains 2 different accelerators, someone might want to
compile only for the first, but libgomp will load 2 plugins, and the program
will crash (instead of doing fallback) if it will try to use the second device.

So, maybe we still need something like this patch?
https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01033.html

  -- Ilya


Re: [RFC] [P2] [PR tree-optimization/33562] Lowering more complex assignments.

2016-02-17 Thread Jeff Law

On 02/17/2016 07:13 AM, Richard Biener wrote:

-  /* Continue walking until we reach a kill.  */
-  while (!stmt_kills_ref_p (temp, ref));
+  /* Continue walking until we reach a full kill as a single statement
+ or there are no more live bytes.  */
+  while (!stmt_kills_ref_p (temp, ref)
+&& !(live_bytes && bitmap_empty_p (live_bytes)));


Just a short quick comment - the above means you only handle partial stores
with no interveaning uses.  You don't handle, say

struct S { struct R { int x; int y; } r; int z; } s;

  s = { {1, 2}, 3 };
  s.r.x = 1;
  s.r.y = 2;
  struct R r = s.r;
  s.z = 3;

where s = { {1, 2}, 3} is still dead.
Right.  But handling that has never been part of DSE's design goals. 
Once there's a use, DSE has always given up.


Having said that...



That is, you don't make use of the live_bytes in the ref_maybe_used_by_stmt_p
check (you can skip uses of only dead bytes).

Not sure if it makes a difference in practice (compared to the cost it
would take).
Not sure either.  It doesn't appear that it would be hard to experiment 
with that to see if it's worth the effort.  My gut feeling is we're not 
going to see this often, if at all, in practice.




Rather than building ao_refs in clear_bytes_written_by just use
get_ref_base_and_extent
directly.
Easy enough to do, but ISTM if we use get_ref_base_and_extent in 
clear_bytes_written-by, then the other blob of similar code in 
tree-ssa-dse should be handled in the same way.  ie, the code you see in 
clear_bytes_written_by is almost a direct copy of code already existing 
in tree-ssa-dse.c (hence my feeling that there's some refactoring of 
that code that we want to do).






You don't handle stuff like

  s[i] = { 1, 2 };
  s[i].x = 1;
  s[i].y = 1;

either btw.

Correct I believe.

IIRC (I think I looked at this during debugging at some point), the 
ao_ref->max_size field will cover the entire array for this kind of 
situation because we don't know which element in the array we're hitting 
(or -1 if we don't know the array's size).  I don't see a reasonable way 
to handle it with an ao_ref style interface unless the variable parts of 
the address computation are all rolled into the ao_ref->base field.


I did look for cases where the initial store was to a varying location 
and thus max_size covered the entire array with killing stores that 
eventually covered the entire array (but with each individual killing 
store having size == max_size) -- the situation never came up in the 
codes I looked at (gcc & its runtime libraries of course).


Jeff


Re: [PATCH] Fix driver handling of multiple -ftree-parallelize-loops= options (PR driver/69805)

2016-02-17 Thread Sandra Loosemore

On 02/17/2016 12:14 AM, Tom de Vries wrote:


Here's the documentation entry for the gt spec function (I forgot to add
it when introducing the function), using the new semantics.

Copy-pasting from the resulting .info viewed in emacs for a
human-readable version:
...
  'gt'
   The 'gt' (greater than) function takes one or more arguments.
   It returns either NULL or the empty string.  If it has one
   argument, it returns NULL.  If it has two arguments, it
   compares them: it returns the empty string if the first
   argument is greater than the second argument, otherwise it
   returns NULL.  If it has more than two arguments, it behaves
   as if only the last two arguments were passed.  It can be used
   f.i. as 'S' in a spec directive %{'S':'X'}: if 'S' is NULL,
   the empty string is substituted, and if 'S' is the empty
   string, 'X' is substituted.

%:gt(%{fsome-option-value=*:%*} 1)
...

OK for stage4 trunk?


I'm not an expert on spec strings  but from a user perspective, what 
is the difference between "NULL" and "the empty string"?  The other spec 
escapes are documented in terms of pattern substitutions at the point 
where the escape appears in the spec string.


-Sandra



[PATCH][AArch64][v2] Skip gcc.target/aarch64/assembler_arch_1.c if assembler does not support it

2016-02-17 Thread Kyrill Tkachov

Hi all,

I've thought about this check a bit more and I think we can compactly 
auto-generate checks
for any aarch64 architecture extension support in the assembler.
This is done in a similar way we autogenerate the arm_arch_*_ok checks for arm.

So in this revision we autogenerate aarch64_asm__ok checks for every 
architecture extension
using some of the expect machinery. This should make this approach a bit more 
general to handle
checks for any .arch_extension argument without much extra cost.

This still assumes that the assembler supports the .arch_extension pseudo-op, 
the effective
target check will fail if it doesn't. This is what we want for this testcase.

Is this patch ok instead of 
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01052.html ?

Thanks,
Kyrill

2016-02-17  Kyrylo Tkachov  

* lib/target-supports.exp: Define aarch64_asm_FUNC_ok checks
for fp, simd, crypto, crc, lse.
* doc/sourcebuild.texi (AArch64-specific attributes): Document the
above.
* gcc.target/aarch64/assembler_arch_1.c: Add aarch64_asm_lse_ok
effective target check.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 6d548aad7aa24c59b40ec13d9c99733d94ec0aa6..19fd938afff9bb480e2262d07ce5c8ff9ca167c7 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1606,6 +1606,10 @@ ARM target prefers @code{LDRD} and @code{STRD} instructions over
 @subsubsection AArch64-specific attributes
 
 @table @code
+@item aarch64_asm__ok
+AArch64 assembler supports the architecture extension @code{ext} via the
+@code{.arch_extension} pseudo-op.  The values of @code{ext} are defined in
+the file config/aarch64/aarch64-option-extensions.def.
 @item aarch64_tiny
 AArch64 target which generates instruction sequences for tiny memory model.
 @item aarch64_small
diff --git a/gcc/testsuite/gcc.target/aarch64/assembler_arch_1.c b/gcc/testsuite/gcc.target/aarch64/assembler_arch_1.c
index 901e50a178d7a4a443a5ad0abe63f624688db268..5deea5cf0ee9306743bc47bace6f762d0e35ce65 100644
--- a/gcc/testsuite/gcc.target/aarch64/assembler_arch_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/assembler_arch_1.c
@@ -1,4 +1,5 @@
 /* { dg-do assemble } */
+/* { dg-require-effective-target aarch64_asm_lse_ok } */
 /* { dg-options "-march=armv8-a" } */
 
 /* Make sure that the function header in assembly doesn't override
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 66fb1eaf7bd4aa58d23cfc9203e9f27573c7a303..f399f185d25aa5a947b7a17fd6020dc311b18f58 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6719,6 +6719,23 @@ proc check_effective_target_aarch64_tiny { } {
 }
 }
 
+# Create functions to check that the AArch64 assembler supports the
+# various architecture extensions via the .arch_extension pseudo-op.
+
+foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse"} {
+eval [string map [list FUNC $aarch64_ext] {
+	proc check_effective_target_aarch64_asm_FUNC_ok { } {
+	  if { [istarget aarch64*-*-*] } {
+		return [check_no_compiler_messages aarch64_lse_assembler object {
+			__asm__ (".arch_extension FUNC");
+		} "-march=armv8-a+FUNC"]
+	  } else {
+		return 0
+	  }
+	}
+}]
+}
+
 proc check_effective_target_aarch64_small { } {
 if { [istarget aarch64*-*-*] } {
 	return [check_no_compiler_messages aarch64_small object {


Re: [PATCH, PR69607] Mark offload symbols as global in lto

2016-02-17 Thread Tom de Vries

On 17/02/16 13:30, Jakub Jelinek wrote:

On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote:

Mark offload symbols as global in lto


I'm really not familiar with that part of LTO, so I'm CCing Honza and
Richard here.

2016-02-08  Tom de Vries  

PR lto/69607
* lto-partition.c (promote_offload_tables): New function.
* lto-partition.h (promote_offload_tables):  Declare.


Just one space instead of two after :


* lto.c (do_whole_program_analysis): call promote_offload_tables.


Capital C in Call.



Done.


diff --git a/libgomp/testsuite/libgomp.c/target-37.c 
b/libgomp/testsuite/libgomp.c/target-37.c
new file mode 100644
index 000..1edb21e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-37.c
@@ -0,0 +1,98 @@
+/* { dg-do run { target lto } } */
+/* { dg-additional-sources "target-38.c" } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" 
} */
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);


Why the C++ stuff in there?  Do you intend to include the testcase
also in libgomp.c++?


No, that's just there because I started both target-37.c and target-38.c 
by copying target-1.c.



If not, it is not needed.


Removed.


Otherwise, the tests LGTM.



Updated patch attached.

Thanks,
- Tom

Mark offload symbols as global in lto

2016-02-17  Tom de Vries  

	PR lto/69607
	* lto-partition.c (promote_offload_tables): New function.
	* lto-partition.h (promote_offload_tables): Declare.
	* lto.c (do_whole_program_analysis): Call promote_offload_tables.

	* testsuite/libgomp.c/target-36.c: New test.
	* testsuite/libgomp.c/target-37.c: New test.
	* testsuite/libgomp.c/target-38.c: New test.

---
 gcc/lto/lto-partition.c | 28 ++
 gcc/lto/lto-partition.h |  1 +
 gcc/lto/lto.c   |  2 +
 libgomp/testsuite/libgomp.c/target-36.c |  4 ++
 libgomp/testsuite/libgomp.c/target-37.c | 94 +
 libgomp/testsuite/libgomp.c/target-38.c | 91 +++
 6 files changed, 220 insertions(+)

diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..56598d4 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "ipa-inline.h"
 #include "lto-partition.h"
+#include "omp-low.h"
 
 vec ltrans_partitions;
 
@@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node)
 	"Promoting as hidden: %s\n", node->name ());
 }
 
+/* Promote the symbols in the offload tables.  */
+
+void
+promote_offload_tables (void)
+{
+  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+return;
+
+  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+{
+  tree fn_decl = (*offload_funcs)[i];
+  cgraph_node *node = cgraph_node::get (fn_decl);
+  if (node->externally_visible)
+	continue;
+  promote_symbol (node);
+}
+
+  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+{
+  tree var_decl = (*offload_vars)[i];
+  varpool_node *node = varpool_node::get (var_decl);
+  if (node->externally_visible)
+	continue;
+  promote_symbol (node);
+}
+}
+
 /* Return true if NODE needs named section even if it won't land in the partition
symbol table.
FIXME: we should really not use named sections for inline clones and master
diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
index 31e3764..1a38126 100644
--- a/gcc/lto/lto-partition.h
+++ b/gcc/lto/lto-partition.h
@@ -36,6 +36,7 @@ extern vec ltrans_partitions;
 void lto_1_to_1_map (void);
 void lto_max_map (void);
 void lto_balanced_map (int);
+extern void promote_offload_tables (void);
 void lto_promote_cross_file_statics (void);
 void free_ltrans_partitions (void);
 void lto_promote_statics_nonwpa (void);
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 9dd513f..2736c5c 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3138,6 +3138,8 @@ do_whole_program_analysis (void)
  to globals with hidden visibility because they are accessed from multiple
  partitions.  */
   lto_promote_cross_file_statics ();
+  /* Promote all the offload symbols.  */
+  promote_offload_tables ();
   timevar_pop (TV_WHOPR_PARTITIONING);
 
   timevar_stop (TV_PHASE_OPT_GEN);
diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
new file mode 100644
index 000..bafb718
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-36.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target lto } } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+#include "target-1.c"
diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c
new file mode 100644
index 000..fe5b8ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-37.c
@@ -0,0 +1,94 @@
+/* { 

Re: [patch] fix docs for C++ warn_unused type attribute

2016-02-17 Thread Jason Merrill

On 02/17/2016 09:49 AM, Jonathan Wakely wrote:

https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html says
that attribute((warn_unused)) is not useful for std::mutex because it
controls a resource. That's incorrect, std::mutex *is* a resource, but
it doesn't control one. In fact declaring std::mutex as a local
variable and then never using it almost certainly is a mistake,
because it won't be locked unless you use it.

A better example of a type that controls a resource would be
std::lock_guard, which you would typically construct and then never
refer to again (because everything interesting happens in the
constructor and destructor).

OK for trunk?


OK.

Jason




Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Marek Polacek
On Wed, Feb 17, 2016 at 04:14:22PM +0100, Jakub Jelinek wrote:
> On Wed, Feb 17, 2016 at 04:11:44PM +0100, Marek Polacek wrote:
> > On Wed, Feb 17, 2016 at 06:08:14PM +0300, Ilya Verbin wrote:
> > > > This line is too long.  But you could have just done
> > > 
> > > My editor shows exactly 80 chars.
> > 
> > The maximum is 79.
> 
> Well, check_GNU_style.sh complains just about one line, and then
> a prototype.
> 
> Lines should not exceed 80 characters.
> 193:+extern tree finish_omp_clauses  (tree, bool, bool = 
> false, bool = false);
> 252:+  error ("linear clause applied to non-integral 
> non-pointer "

Maybe it should be fixed with this then.  Because
 says
"Please keep the length of source lines to 79 characters or less, for maximum
readability in the widest range of environments."

diff --git a/contrib/check_GNU_style.sh b/contrib/check_GNU_style.sh
index ac54ed0..485f4de 100755
--- a/contrib/check_GNU_style.sh
+++ b/contrib/check_GNU_style.sh
@@ -169,16 +169,16 @@ col (){
 
# Remove line number prefix and patch modifier '+'.
# Expand tabs to spaces according to tab positions.
-   # Keep long lines, make short lines empty.  Print the part past 80 chars
-   # in red.
+   # Keep long lines, make short lines empty.  Print the part past 79
+   # chars in red.
cat "$tmp" \
| sed 's/^[0-9]*:+//' \
| expand \
| awk '{ \
-if (length($0) > 80) \
+if (length($0) > 79) \
   printf "%s\033[1;31m%s\033[0m\n", \
- substr($0,1,80), \
- substr($0,81); \
+ substr($0,1,79), \
+ substr($0,80); \
 else \
   print "" \
   }' \
@@ -201,7 +201,7 @@ col (){
 done
 }
 
-col 'Lines should not exceed 80 characters.'
+col 'Lines should not exceed 79 characters.'
 
 g 'Blocks of 8 spaces should be replaced with tabs.' \
 ' {8}'

Marek


Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Markus Trippelsdorf
On 2016.02.17 at 18:29 +0300, Ilya Verbin wrote:
> On Wed, Feb 17, 2016 at 16:28:34 +0100, Marek Polacek wrote:
> > On Wed, Feb 17, 2016 at 04:14:22PM +0100, Jakub Jelinek wrote:
> > > On Wed, Feb 17, 2016 at 04:11:44PM +0100, Marek Polacek wrote:
> > > > On Wed, Feb 17, 2016 at 06:08:14PM +0300, Ilya Verbin wrote:
> > > > > > This line is too long.  But you could have just done
> > > > > 
> > > > > My editor shows exactly 80 chars.
> > > > 
> > > > The maximum is 79.
> > > 
> > > Well, check_GNU_style.sh complains just about one line, and then
> > > a prototype.
> > > 
> > > Lines should not exceed 80 characters.
> > > 193:+extern tree finish_omp_clauses  (tree, bool, bool = 
> > > false, bool = false);
> > > 252:+  error ("linear clause applied to non-integral 
> > > non-pointer "
> > 
> > Maybe it should be fixed with this then.  Because
> >  says
> > "Please keep the length of source lines to 79 characters or less, for 
> > maximum
> > readability in the widest range of environments."
> 
> https://gcc.gnu.org/codingconventions.html#Line says 80.

 gcc % cat contrib/clang-format | grep ColumnLimit
ColumnLimit: 80

-- 
Markus


Re: [PATCH] Add debug_function_to_file

2016-02-17 Thread Tom de Vries

On 17/02/16 14:42, Richard Biener wrote:

On Wed, Feb 17, 2016 at 1:41 PM, Tom de Vries  wrote:

>Hi,
>
>once in a while I'm in a gdb debug session debugging cc1, and want to print
>the current function to file.
>
>There's a debug function debug_function that prints a function to stderr,
>and there are methods to redirect output of a command to a file (
>https://sourceware.org/gdb/onlinedocs/gdb/Logging-Output.html  ).
>
>And there's a function dump_function_to_file that takes a FILE* parameter,
>which could be combined with open/close calls in gdb.
>
>But I think a short-hand is easier.
>
>This patch adds a function debug_function_to_file. It can f.i. be called as:
>...
>(gdb) call debug_function_to_file (cfun.decl, "foo.1.txt", 0)
>...
>
>Hmm, now I wonder if the order 'cfun.decl, 0, "foo.1.txt"' would make more
>sense (first two parameters the same as in debug_function).
>
>OK for stage1 trunk if bootstrap and reg-test succeeds?



Bonus for making this a helper in gdbhooks.py instead, using
fopen/fclose and the existing inferior calls.


[ Right, I forgot about those gdb helpers. I haven't used them before, 
it's probably time to start using those.


I've made an init file ~/.gdbgccinit (a stripped version of 
/gcc/.gdbinit):

...
$ cat ~/.gdbgccinit
source /gcc/gdbinit.in
python import sys; sys.path.append('/gcc'); import gdbhooks
...

and a script gdbgcc:
...
$ cat ~/bin/gdbgcc
#!/bin/sh

base=$(basename "$2")

case "$base" in
cc1|cc1plus|f951|lto1)
gdbopt="-x ~/.gdbgccinit"
;;
*)
gdbopt=""
;;
esac

exec gdb $gdbopt "$@"
...

and that allows me to do -wrapper gdbgcc,--args, and import the helpers 
for cc1, and skip them for as.


That seems to work, though I'm curious what approaches other people use. ]

I'll take a look at implementing this in gdbhooks.py (or gdbinit.in 
perhaps?).


Thanks,
- Tom


Re: [PATCH] Add debug_function_graph_to_file

2016-02-17 Thread Tom Tromey
Richard> What does it take to write it in python instead?

IIUC you're asking for it to display the function graph at a given point
in time.  This is easy - you can just dump it to a file and then run the
appropriate visualization tool.

Once upon a time I also wrote Python code to display a function's CFG
from Python, and then update it as you stepped through gcc.  This is on
the "demo" branch here https://github.com/tromey/gdb-gui.  It's probably
bit-rotted by now but it shows the general idea.  This was for a talk at
FOSDEM, the video is online if you want to see what it looks like.

Tom


Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Ilya Verbin
On Wed, Feb 17, 2016 at 16:28:34 +0100, Marek Polacek wrote:
> On Wed, Feb 17, 2016 at 04:14:22PM +0100, Jakub Jelinek wrote:
> > On Wed, Feb 17, 2016 at 04:11:44PM +0100, Marek Polacek wrote:
> > > On Wed, Feb 17, 2016 at 06:08:14PM +0300, Ilya Verbin wrote:
> > > > > This line is too long.  But you could have just done
> > > > 
> > > > My editor shows exactly 80 chars.
> > > 
> > > The maximum is 79.
> > 
> > Well, check_GNU_style.sh complains just about one line, and then
> > a prototype.
> > 
> > Lines should not exceed 80 characters.
> > 193:+extern tree finish_omp_clauses  (tree, bool, bool = 
> > false, bool = false);
> > 252:+  error ("linear clause applied to non-integral 
> > non-pointer "
> 
> Maybe it should be fixed with this then.  Because
>  says
> "Please keep the length of source lines to 79 characters or less, for maximum
> readability in the widest range of environments."

https://gcc.gnu.org/codingconventions.html#Line says 80.

  -- Ilya


Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Jakub Jelinek
On Wed, Feb 17, 2016 at 04:11:44PM +0100, Marek Polacek wrote:
> On Wed, Feb 17, 2016 at 06:08:14PM +0300, Ilya Verbin wrote:
> > > This line is too long.  But you could have just done
> > 
> > My editor shows exactly 80 chars.
> 
> The maximum is 79.

Well, check_GNU_style.sh complains just about one line, and then
a prototype.

Lines should not exceed 80 characters.
193:+extern tree finish_omp_clauses  (tree, bool, bool = false, 
bool = false);
252:+  error ("linear clause applied to non-integral 
non-pointer "

(plus testcases, but there it is fine).

Jakub


Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Marek Polacek
On Wed, Feb 17, 2016 at 06:08:14PM +0300, Ilya Verbin wrote:
> > This line is too long.  But you could have just done
> 
> My editor shows exactly 80 chars.

The maximum is 79.

Marek


Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Ilya Verbin
On Wed, Feb 17, 2016 at 15:46:00 +0100, Jakub Jelinek wrote:
> On Wed, Feb 17, 2016 at 05:32:58PM +0300, Ilya Verbin wrote:
> > + && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))
> > + && TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE)
> > +   {
> > + error_at (OMP_CLAUSE_LOCATION (c),
> > +   "linear clause applied to non-integral, "
> > +   "non-floating, non-pointer variable with type %qT",
> > +   TREE_TYPE (t));
> > + remove = true;
> > + break;
> > +   }
> > +   }
> > + else
> > +   {
> > + if (!INTEGRAL_TYPE_P (TREE_TYPE (t))
> > + && TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE)
> > +   {
> > + error_at (OMP_CLAUSE_LOCATION (c),
> > +   "linear clause applied to non-integral non-pointer "
> 
> This line is too long.  But you could have just done

My editor shows exactly 80 chars.

> > --- a/gcc/cp/semantics.c
> > +++ b/gcc/cp/semantics.c
> 
> > + error ("linear clause applied to non-integral, "
> > +"non-floating, non-pointer variable with %qT type",
> 
> Again too long line, that needs to be wrapped more.

OK, here is 81.

> > +TREE_TYPE (t));
> > + remove = true;
> > + break;
> > +   }
> > +   }
> > + else
> > +   {
> > + if (!INTEGRAL_TYPE_P (type)
> > + && TREE_CODE (type) != POINTER_TYPE)
> > +   {
> > + error ("linear clause applied to non-integral non-pointer 
> > "
> > +"variable with %qT type", TREE_TYPE (t));
> > + remove = true;
> > + break;
> 
> And this can be done like I've hinted above.

OK, here is 81.

  -- Ilya


Re: [wwwdocs] Describe behavior of -flifetime-dse in class constructors

2016-02-17 Thread Martin Liška
On 02/17/2016 03:23 PM, Jakub Jelinek wrote:
> "has been" looks weird.  I'd say that the C++ compiler is now more
> aggressive...
> 
>   Jakub

Sending v3.

M.
Index: htdocs/gcc-6/porting_to.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/porting_to.html,v
retrieving revision 1.14
diff --unified -r1.14 porting_to.html
--- htdocs/gcc-6/porting_to.html	14 Feb 2016 13:13:43 -	1.14
+++ htdocs/gcc-6/porting_to.html	17 Feb 2016 15:00:35 -
@@ -324,6 +324,52 @@
 -fabi-version or -Wabi option to disable or warn about.
 
 
+More aggressive optimization of -flifetime-dse
+
+
+The C++ compiler (with enabled -flifetime-dse)
+is more aggressive in dead-store elimination in situations where
+a memory store to a location precedes a constructor to the
+memory location. Described situation can be commonly found in programs
+which zero a memory that is eventually passed to a placement new operator:
+
+
+#include stdlib.h
+#include string.h
+#include assert.h
+
+struct A
+{
+  A () {}
+  void *operator new (size_t s)
+  {
+void *ptr = malloc (s);
+memset (ptr, 0, s);
+return ptr;
+  }
+
+  int value;
+};
+
+A *
+__attribute__ ((noinline))
+build (void)
+{
+  return new A ();
+}
+
+int main()
+{
+  A *a =  build ();
+  assert (a-value == 0); /* Use of uninitialized value */
+  free (a);
+}
+
+
+If the program cannot be fixed to remove the undefined behavior then
+the option -fno-lifetime-dse can be used to disable
+this optimization.
+
 -Wmisleading-indentation
 
 A new warning -Wmisleading-indentation was added


[PATCH] Fix PR69568

2016-02-17 Thread Richard Biener

So after pondering a while the only thing we can do now is force
-fno-short-enums.

Installed.

Richard.

2016-02-17  Richard Biener  

PR testsuite/69586
* gcc.dg/uninit-21.c: Add -fno-short-enums.

Index: gcc/testsuite/gcc.dg/uninit-21.c
===
--- gcc/testsuite/gcc.dg/uninit-21.c(revision 233447)
+++ gcc/testsuite/gcc.dg/uninit-21.c(working copy)
@@ -1,6 +1,6 @@
 /* PR69537, spurious warning because of a missed optimization. */
 /* { dg-do compile } */
-/* { dg-options "-O2 -Wuninitialized" } */
+/* { dg-options "-O2 -fno-short-enums -Wuninitialized" } */
 
 enum clnt_stat {
  RPC_SUCCESS=0,


[patch] fix docs for C++ warn_unused type attribute

2016-02-17 Thread Jonathan Wakely

https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html says
that attribute((warn_unused)) is not useful for std::mutex because it
controls a resource. That's incorrect, std::mutex *is* a resource, but
it doesn't control one. In fact declaring std::mutex as a local
variable and then never using it almost certainly is a mistake,
because it won't be locked unless you use it.

A better example of a type that controls a resource would be
std::lock_guard, which you would typically construct and then never
refer to again (because everything interesting happens in the
constructor and destructor).

OK for trunk?


commit 01e62a231d9a446130fec253a2001f1e844e184c
Author: Jonathan Wakely 
Date:   Wed Feb 17 14:08:39 2016 +

	* doc/extend.texi (C++ Attributes): Correct description of
	warn_unused type attribute.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 78017fe..476d089 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -20313,7 +20313,7 @@ types.
 
 This attribute is appropriate for types which just represent a value,
 such as @code{std::string}; it is not appropriate for types which
-control a resource, such as @code{std::mutex}.
+control a resource, such as @code{std::lock_guard}.
 
 This attribute is also accepted in C, but it is unnecessary because C
 does not have constructors or destructors.


Re: [PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Jakub Jelinek
On Wed, Feb 17, 2016 at 05:32:58PM +0300, Ilya Verbin wrote:
> This patch fixes 
> Bootstrap and make check passed.  OK for... stage 1?

Ok for stage1, with a few nits.

> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -12527,7 +12527,8 @@ c_find_omp_placeholder_r (tree *tp, int *, void *data)
> Remove any elements from the list that are invalid.  */
>  
>  tree
> -c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd)
> +c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd,
> +   bool is_cilk)

Instead of passing 3 bools, it might be better to pass either a tree_code
or some enum or bitmask that would tell the code what are the clauses
used on.  But that can be done separately.
> +   && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))
> +   && TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE)
> + {
> +   error_at (OMP_CLAUSE_LOCATION (c),
> + "linear clause applied to non-integral, "
> + "non-floating, non-pointer variable with type %qT",
> + TREE_TYPE (t));
> +   remove = true;
> +   break;
> + }
> + }
> +   else
> + {
> +   if (!INTEGRAL_TYPE_P (TREE_TYPE (t))
> +   && TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE)
> + {
> +   error_at (OMP_CLAUSE_LOCATION (c),
> + "linear clause applied to non-integral non-pointer "

This line is too long.  But you could have just done
  else if (!INTEGRAL_TYPE_P (TREE_TYPE (t))
   && TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE)
{
instead and then it would fit.

> --- a/gcc/cp/semantics.c
> +++ b/gcc/cp/semantics.c
> @@ -5736,7 +5736,8 @@ cp_finish_omp_clause_depend_sink (tree sink_clause)
> Remove any elements from the list that are invalid.  */
>  
>  tree
> -finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd)
> +finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd,
> + bool is_cilk)

Similarly to the above note.

> +   error ("linear clause applied to non-integral, "
> +  "non-floating, non-pointer variable with %qT type",

Again too long line, that needs to be wrapped more.

> +  TREE_TYPE (t));
> +   remove = true;
> +   break;
> + }
> + }
> +   else
> + {
> +   if (!INTEGRAL_TYPE_P (type)
> +   && TREE_CODE (type) != POINTER_TYPE)
> + {
> +   error ("linear clause applied to non-integral non-pointer 
> "
> +  "variable with %qT type", TREE_TYPE (t));
> +   remove = true;
> +   break;

And this can be done like I've hinted above.

Jakub


[PATCH] Fix PR69854

2016-02-17 Thread Richard Biener

The following patch fixes PR69854 - match.pd shouldn't use 
fold_unary/binary and expect no or a constant result.  Instead using
the now available const_binop/unop is recommended.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2016-02-17  Richard Biener  

PR middle-end/69854
* match.pd: Don't use fold_binary or fold_unary for folding
constants.

* gcc.dg/torture/pr69854.c: New testcase.

Index: gcc/match.pd
===
--- gcc/match.pd(revision 233447)
+++ gcc/match.pd(working copy)
@@ -1063,7 +1063,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* If the constant operation overflows we cannot do the transform
as we would introduce undefined overflow, for example
with (a - 1) + INT_MIN.  */
- (with { tree cst = fold_binary (outer_op == inner_op
+ (with { tree cst = const_binop (outer_op == inner_op
 ? PLUS_EXPR : MINUS_EXPR, type, @1, @2); }
   (if (cst && !TREE_OVERFLOW (cst))
(inner_op @0 { cst; } ))
@@ -1072,7 +1072,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (for outer_op (plus minus)
(simplify
 (outer_op (minus CONSTANT_CLASS_P@1 @0) CONSTANT_CLASS_P@2)
-(with { tree cst = fold_binary (outer_op, type, @1, @2); }
+(with { tree cst = const_binop (outer_op, type, @1, @2); }
  (if (cst && !TREE_OVERFLOW (cst))
   (minus { cst; } @0)
 
@@ -1270,7 +1270,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
RROTATE_EXPR by a new constant.  */
 (simplify
  (lrotate @0 INTEGER_CST@1)
- (rrotate @0 { fold_binary (MINUS_EXPR, TREE_TYPE (@1),
+ (rrotate @0 { const_binop (MINUS_EXPR, TREE_TYPE (@1),
build_int_cst (TREE_TYPE (@1),
   element_precision (type)), @1); }))
 
@@ -1596,7 +1596,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (plus @0 REAL_CST@1)
  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-  (with { tree tem = fold_unary (NEGATE_EXPR, type, @1); }
+  (with { tree tem = const_unop (NEGATE_EXPR, type, @1); }
(if (!TREE_OVERFLOW (tem) || !flag_trapping_math)
 (minus @0 { tem; })
 
@@ -2149,7 +2149,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (FLOAT_TYPE_P (TREE_TYPE (@0))
|| (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0
-   (with { tree tem = fold_unary (NEGATE_EXPR, TREE_TYPE (@0), @1); }
+   (with { tree tem = const_unop (NEGATE_EXPR, TREE_TYPE (@0), @1); }
 (if (tem && !TREE_OVERFLOW (tem))
  (scmp @0 { tem; }))
 
Index: gcc/testsuite/gcc.dg/torture/pr69854.c
===
--- gcc/testsuite/gcc.dg/torture/pr69854.c  (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr69854.c  (working copy)
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-frounding-math -ffast-math" } */
+
+double fn1()
+{
+  double w, s = fn1() - 6.12323399573676603587e17;
+  return 1.57079632679489655800e00 - (s + w);
+}


[PATCH][CilkPlus] Fix PR69363

2016-02-17 Thread Ilya Verbin
Hi!

This patch fixes 
Bootstrap and make check passed.  OK for... stage 1?


gcc/c-family/
PR c++/69363
* c-cilkplus.c (c_finish_cilk_clauses): Remove function.
* c-common.h (c_finish_cilk_clauses): Remove declaration.
gcc/c/
PR c++/69363
* c-parser.c (c_parser_cilk_all_clauses): Use c_finish_omp_clauses
instead of c_finish_cilk_clauses.
* c-tree.h (c_finish_omp_clauses): Add new default argument.
* c-typeck.c (c_finish_omp_clauses): Add new argument.  Allow
floating-point variables in the linear clause for Cilk Plus.
gcc/cp/
PR c++/69363
* cp-tree.h (finish_omp_clauses): Add new default argument.
* parser.c (cp_parser_cilk_simd_all_clauses): Use finish_omp_clauses
instead of c_finish_cilk_clauses.
* semantics.c (finish_omp_clauses): Add new argument.  Allow
floating-point variables in the linear clause for Cilk Plus.
gcc/testsuite/
PR c++/69363
* c-c++-common/cilk-plus/PS/clauses3.c: Adjust dg-error string.
* c-c++-common/cilk-plus/PS/clauses4.c: New test.
* c-c++-common/cilk-plus/PS/pr69363.c: New test.


diff --git a/gcc/c-family/c-cilkplus.c b/gcc/c-family/c-cilkplus.c
index 3e7902fd..9f1f364 100644
--- a/gcc/c-family/c-cilkplus.c
+++ b/gcc/c-family/c-cilkplus.c
@@ -41,56 +41,6 @@ c_check_cilk_loop (location_t loc, tree decl)
   return true;
 }
 
-/* Validate and emit code for <#pragma simd> clauses.  */
-
-tree
-c_finish_cilk_clauses (tree clauses)
-{
-  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
-{
-  tree prev = clauses;
-
-  /* If a variable appears in a linear clause it cannot appear in
-any other OMP clause.  */
-  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
-   for (tree c2 = clauses; c2; c2 = OMP_CLAUSE_CHAIN (c2))
- {
-   if (c == c2)
- continue;
-   enum omp_clause_code code = OMP_CLAUSE_CODE (c2);
-
-   switch (code)
- {
- case OMP_CLAUSE_LINEAR:
- case OMP_CLAUSE_PRIVATE:
- case OMP_CLAUSE_FIRSTPRIVATE:
- case OMP_CLAUSE_LASTPRIVATE:
- case OMP_CLAUSE_REDUCTION:
-   break;
-
- case OMP_CLAUSE_SAFELEN:
-   goto next;
-
- default:
-   gcc_unreachable ();
- }
-
-   if (OMP_CLAUSE_DECL (c) == OMP_CLAUSE_DECL (c2))
- {
-   error_at (OMP_CLAUSE_LOCATION (c2),
- "variable appears in more than one clause");
-   inform (OMP_CLAUSE_LOCATION (c),
-   "other clause defined here");
-   // Remove problematic clauses.
-   OMP_CLAUSE_CHAIN (prev) = OMP_CLAUSE_CHAIN (c2);
- }
- next:
-   prev = c2;
- }
-}
-  return clauses;
-}
-
 /* Calculate number of iterations of CILK_FOR.  */
 
 tree
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index fa3746c..663e457 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1369,7 +1369,6 @@ extern enum stv_conv scalar_to_vector (location_t loc, 
enum tree_code code,
   tree op0, tree op1, bool);
 
 /* In c-cilkplus.c  */
-extern tree c_finish_cilk_clauses (tree);
 extern tree c_validate_cilk_plus_loop (tree *, int *, void *);
 extern bool c_check_cilk_loop (location_t, tree);
 
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 7a27244..4770f45d 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -17427,7 +17427,7 @@ c_parser_cilk_all_clauses (c_parser *parser)
 
  saw_error:
   c_parser_skip_to_pragma_eol (parser);
-  return c_finish_cilk_clauses (clauses);
+  return c_finish_omp_clauses (clauses, false, false, true);
 }
 
 /* This function helps parse the grainsize pragma for a _Cilk_for statement.
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index 96ab049..8bfd256 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -661,7 +661,7 @@ extern tree c_begin_omp_task (void);
 extern tree c_finish_omp_task (location_t, tree, tree);
 extern void c_finish_omp_cancel (location_t, tree);
 extern void c_finish_omp_cancellation_point (location_t, tree);
-extern tree c_finish_omp_clauses (tree, bool, bool = false);
+extern tree c_finish_omp_clauses (tree, bool, bool = false, bool = false);
 extern tree c_build_va_arg (location_t, tree, location_t, tree);
 extern tree c_finish_transaction (location_t, tree, int);
 extern bool c_tree_equal (tree, tree);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 1122a88..d91bd72 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -12527,7 +12527,8 @@ c_find_omp_placeholder_r (tree *tp, int *, void *data)
Remove any elements from the list that are invalid.  */
 
 tree
-c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd)
+c_finish_omp_clauses (tree clauses, bool 

RFA: Prevent an ICE when redeclaring a static function as weak

2016-02-17 Thread Nick Clifton
Hi Guys,

  Redefining a previously defined static function as both public and
  weak triggers an ICE in ipa-visibility.c:

internal compiler error: in function_and_variable_visibility, at 
ipa-visibility.c:518

  This bug has been discussed and patch proposed here:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49899

  This submission is an updated version of that patch, made against
  the latest gcc sources.  It still chooses to generate an error message
  and disallow the conversion, which I hope is the correct action.

  OK to apply ?

Cheers
  Nick

gcc/ChangeLog
2016-02-17  Nick Clifton  

PR middle-end/49889
* varasm.c (merge_weak): Generate an error if an attempt is made
to convert a non-weak static function into a weak, public function.

gcc/testsuite/ChangeLog
2016-02-17  Nick Clifton  

PR middle-end/49889
* gcc.dg/pr49889.c: New test.

Index: gcc/varasm.c
===
--- gcc/varasm.c(revision 233486)
+++ gcc/varasm.c(working copy)
@@ -5366,6 +5366,11 @@
   gcc_assert (!TREE_USED (olddecl)
  || !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (olddecl)));
 
+  /* PR 49899: You cannot convert a static function into a weak, public 
function.  */
+  if (! TREE_PUBLIC (olddecl) && TREE_PUBLIC (newdecl))
+   error ("weak declaration of %q+D being applied to a already "
+  "existing, static definition", newdecl);
+  
   if (TARGET_SUPPORTS_WEAK)
{
  /* We put the NEWDECL on the weak_decls list at some point.
--- /dev/null   2016-02-17 08:13:41.436963282 +
+++ gcc/testsuite/gcc.dg/pr49899.c  2016-02-17 14:12:41.066733255 +
@@ -0,0 +1,3 @@
+static int foo (void) { return 0; } /* { dg-error "weak declaration of 'foo' 
being applied to a already existing, static definition" } */
+int foo (void)  __attribute__((weak));
+


Re: [wwwdocs] Describe behavior of -flifetime-dse in class constructors

2016-02-17 Thread Jakub Jelinek
On Wed, Feb 17, 2016 at 03:21:07PM +0100, Martin Liška wrote:
> --- htdocs/gcc-6/porting_to.html  14 Feb 2016 13:13:43 -  1.14
> +++ htdocs/gcc-6/porting_to.html  17 Feb 2016 14:20:13 -
> @@ -324,6 +324,52 @@
>  -fabi-version or -Wabi option to disable or warn about.
>  
>  
> +More aggressive optimization of -flifetime-dse
> +
> +
> +The C++ compiler (with enabled -flifetime-dse)
> +has been more aggressive in dead-store elimination in situations where

"has been" looks weird.  I'd say that the C++ compiler is now more
aggressive...

Jakub


Re: [wwwdocs] Describe behavior of -flifetime-dse in class constructors

2016-02-17 Thread Martin Liška
On 02/16/2016 05:55 PM, Martin Sebor wrote:
> I think the new text deserves a new heading of its own rather than
> being added under the existing "Stricter flexible array member rules."
> (The "Finally..." part changed by the patch still applies to the
> flexible array members.)
> 
> Martin

Hi Martin.

Thanks for the nit, fixed in v2.

Ready to be installed?
Martin
Index: htdocs/gcc-6/porting_to.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/porting_to.html,v
retrieving revision 1.14
diff --unified -r1.14 porting_to.html
--- htdocs/gcc-6/porting_to.html	14 Feb 2016 13:13:43 -	1.14
+++ htdocs/gcc-6/porting_to.html	17 Feb 2016 14:20:13 -
@@ -324,6 +324,52 @@
 -fabi-version or -Wabi option to disable or warn about.
 
 
+More aggressive optimization of -flifetime-dse
+
+
+The C++ compiler (with enabled -flifetime-dse)
+has been more aggressive in dead-store elimination in situations where
+a memory store to a location precedes a constructor to the
+memory location. Described situation can be commonly found in programs
+which zero a memory that is eventually passed to a placement new operator:
+
+
+#include stdlib.h
+#include string.h
+#include assert.h
+
+struct A
+{
+  A () {}
+  void *operator new (size_t s)
+  {
+void *ptr = malloc (s);
+memset (ptr, 0, s);
+return ptr;
+  }
+
+  int value;
+};
+
+A *
+__attribute__ ((noinline))
+build (void)
+{
+  return new A ();
+}
+
+int main()
+{
+  A *a =  build ();
+  assert (a-value == 0); /* Use of uninitialized value */
+  free (a);
+}
+
+
+If the program cannot be fixed to remove the undefined behavior then
+the option -fno-lifetime-dse can be used to disable
+this optimization.
+
 -Wmisleading-indentation
 
 A new warning -Wmisleading-indentation was added


Re: [RFC] [P2] [PR tree-optimization/33562] Lowering more complex assignments.

2016-02-17 Thread Richard Biener
On Wed, Feb 17, 2016 at 3:02 PM, Jeff Law  wrote:
> On 02/17/2016 03:48 AM, Richard Biener wrote:
>
>>> I instrumented a bootstrap -- the improved DSE finds ~20k additional DSE
>>> opportunities during a GCC bootstrap that could not be found by the
>>> current
>>> DSE.  Yes, 20k additional statements deleted by tree DSE.  Yow!
>>
>>
>> Well, DCE also can do quite some DSE and it runs after DSE - did that 20k
>> more DSE affect the overall end-result?
>
> I haven't looked at that yet.  I just got the instrumentation data last
> night.
>
>
>>> Of those additional opportunities > 99% are for sizes of 64 bytes or
>>> smaller.  Thus we can pack those into 1 or 2 bitmap elements, depending
>>> on
>>> the starting offset.  So the bitmap side will be efficient with no real
>>> searching if we choose our PARAM value wisely.
>>
>>
>> So then please use a uint64_t or even uint32_t mask please.  Which means
>> a fixed size SBITMAP (32 bits) if you like to use the bitmap interface.
>
> I actually prefer the standard bitmap interface as it seamlessly handles
> differences in the starting offset for the writes.
>
>>
>> Can you share your work-in-progress patch?
>
> Easy 'nuff.  This will bootstrap and regression test.  Was planning to spend
> today generating some additional testcodes from new cases it catches and
> looking at impacts on code generation.
>
> I'm particularly interested in any impact on the zero-sized object clobbers.
> I'd like to remove the bits which filter those out.
>
> It feels like there's some refactoring that ought to happen in this code.
> Both in terms of the mostly duplicated test that a particular ref is
> "interesting" and with mostly duplicated code to extract a ref from a mem*
> or assignment.
>
> jeff
>
>
> commit d49afd895524df98c5e53280b1c77f4b61a45ba3
> Author: Jeff Law 
> Date:   Tue Feb 16 13:44:20 2016 -0500
>
> Checkpoint
>
> CHeckpoint
>
> Another checkpoint
>
> Checkpoint
>
> diff --git a/gcc/params.def b/gcc/params.def
> index c0494fa..5aa146b 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -520,6 +520,11 @@ DEFPARAM(PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND,
>  "If number of candidates in the set is smaller, we always try to
> remove unused ivs during its optimization.",
>  10, 0, 0)
>
> +DEFPARAM(PARAM_DSE_MAX_OBJECT_SIZE,
> +"dse-max-object-size",
> +"Maximum size (in bytes) of objects tracked by dead store
> elimination.",
> +64, 0, 0)
> +
>  DEFPARAM(PARAM_SCEV_MAX_EXPR_SIZE,
>  "scev-max-expr-size",
>  "Bound on size of expressions used in the scalar evolutions
> analyzer.",
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
> b/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
> index 87a2638..3155741 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
> @@ -10,4 +10,4 @@ int f(void)
>return g();
>  }
>
> -/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" { xfail
> *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
> b/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
> index e2cd403..e6d027f 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
> @@ -8,4 +8,4 @@ int f(void)
>   __imag__ t = 2;
>  }
>
> -/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" { xfail
> *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
> index 594c20c..ae48ddd 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
> @@ -11,4 +11,4 @@ foo ()
>  }
>
>  /* We should eliminate the first assignment.  */
> -/* { dg-final { scan-tree-dump-times "VDEF" 2 "dse1" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "VDEF" 2 "dse1" } } */
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index 372a0be..97a091b 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-dfa.h"
>  #include "domwalk.h"
>  #include "tree-cfgcleanup.h"
> +#include "params.h"
>
>  /* This file implements dead store elimination.
>
> @@ -68,6 +69,58 @@ along with GCC; see the file COPYING3.  If not see
> remove their dead edges eventually.  */
>  static bitmap need_eh_cleanup;
>
> +/* Clear any bytes written by STMT from the bitmap LIVE_BYTES.  The base
> +   address written by STMT must match the one found in REF, which must
> +   have its base address previously initialized.
> +
> +   This routine must be conservative.  If we don't know the offset or
> +   actual size written, assume nothing was written.  */
> +
> +static void
> +clear_bytes_written_by (bitmap live_bytes, gimple 

Re: [RFC] [P2] [PR tree-optimization/33562] Lowering more complex assignments.

2016-02-17 Thread Jeff Law

On 02/17/2016 03:48 AM, Richard Biener wrote:


I instrumented a bootstrap -- the improved DSE finds ~20k additional DSE
opportunities during a GCC bootstrap that could not be found by the current
DSE.  Yes, 20k additional statements deleted by tree DSE.  Yow!


Well, DCE also can do quite some DSE and it runs after DSE - did that 20k
more DSE affect the overall end-result?
I haven't looked at that yet.  I just got the instrumentation data last 
night.




Of those additional opportunities > 99% are for sizes of 64 bytes or
smaller.  Thus we can pack those into 1 or 2 bitmap elements, depending on
the starting offset.  So the bitmap side will be efficient with no real
searching if we choose our PARAM value wisely.


So then please use a uint64_t or even uint32_t mask please.  Which means
a fixed size SBITMAP (32 bits) if you like to use the bitmap interface.
I actually prefer the standard bitmap interface as it seamlessly handles 
differences in the starting offset for the writes.




Can you share your work-in-progress patch?
Easy 'nuff.  This will bootstrap and regression test.  Was planning to 
spend today generating some additional testcodes from new cases it 
catches and looking at impacts on code generation.


I'm particularly interested in any impact on the zero-sized object 
clobbers.  I'd like to remove the bits which filter those out.


It feels like there's some refactoring that ought to happen in this 
code.  Both in terms of the mostly duplicated test that a particular ref 
is "interesting" and with mostly duplicated code to extract a ref from a 
mem* or assignment.


jeff

commit d49afd895524df98c5e53280b1c77f4b61a45ba3
Author: Jeff Law 
Date:   Tue Feb 16 13:44:20 2016 -0500

Checkpoint

CHeckpoint

Another checkpoint

Checkpoint

diff --git a/gcc/params.def b/gcc/params.def
index c0494fa..5aa146b 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -520,6 +520,11 @@ DEFPARAM(PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND,
 "If number of candidates in the set is smaller, we always try to 
remove unused ivs during its optimization.",
 10, 0, 0)
 
+DEFPARAM(PARAM_DSE_MAX_OBJECT_SIZE,
+"dse-max-object-size",
+"Maximum size (in bytes) of objects tracked by dead store 
elimination.",
+64, 0, 0)
+
 DEFPARAM(PARAM_SCEV_MAX_EXPR_SIZE,
 "scev-max-expr-size",
 "Bound on size of expressions used in the scalar evolutions analyzer.",
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c 
b/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
index 87a2638..3155741 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-4.c
@@ -10,4 +10,4 @@ int f(void)
   return g();
 }
 
-/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" { xfail *-*-* 
} } } */
+/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c 
b/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
index e2cd403..e6d027f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/complex-5.c
@@ -8,4 +8,4 @@ int f(void)
  __imag__ t = 2;
 }
 
-/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" { xfail *-*-* 
} } } */
+/* { dg-final { scan-tree-dump-times "__complex__" 0 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
index 594c20c..ae48ddd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-9.c
@@ -11,4 +11,4 @@ foo ()
 }
 
 /* We should eliminate the first assignment.  */
-/* { dg-final { scan-tree-dump-times "VDEF" 2 "dse1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "VDEF" 2 "dse1" } } */
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 372a0be..97a091b 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-dfa.h"
 #include "domwalk.h"
 #include "tree-cfgcleanup.h"
+#include "params.h"
 
 /* This file implements dead store elimination.
 
@@ -68,6 +69,58 @@ along with GCC; see the file COPYING3.  If not see
remove their dead edges eventually.  */
 static bitmap need_eh_cleanup;
 
+/* Clear any bytes written by STMT from the bitmap LIVE_BYTES.  The base
+   address written by STMT must match the one found in REF, which must
+   have its base address previously initialized.
+
+   This routine must be conservative.  If we don't know the offset or
+   actual size written, assume nothing was written.  */
+
+static void
+clear_bytes_written_by (bitmap live_bytes, gimple *stmt, ao_ref *ref)
+{
+  ao_ref write;
+  write.base = NULL;
+
+  /* It's advantageous to handle certain mem* functions.  */
+  if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+{
+  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
+   {
+ case BUILT_IN_MEMCPY:
+ 

Re: [PATCH] Add debug_function_graph_to_file

2016-02-17 Thread Richard Biener
On Wed, Feb 17, 2016 at 2:51 PM, Marek Polacek  wrote:
> On Wed, Feb 17, 2016 at 02:45:36PM +0100, Richard Biener wrote:
>> OTOH I have in my local trees a more convenient form (attached).
>>
>> (gdb) call debug_dot_cfg (cfun, 1<<6)
>>
>> and a X window with the dotted graph opens.
>
> Is there any chance we could get this into the mainline?  I'd love to use
> this without having to patch my tree locally.  If I remember correctly
> there were some issues with unportable popen()?

I think I never posted it for inclusion.  The ??? would also need investigation,
probably using a temporary file is easier (though would require manual
cleaning).

What does it take to write it in python instead?

Richard.

> Marek


Re: [PATCH] Add a new option "-fmerge-bitfields" (patch / doc inside)

2016-02-17 Thread Richard Biener
On Wed, 17 Feb 2016, Richard Biener wrote:

> On Wed, 17 Feb 2016, Bernd Schmidt wrote:
> 
> > 
> > 
> > On 02/17/2016 02:18 PM, Daniel Gutson wrote:
> > > On Wed, Nov 26, 2014 at 5:46 AM, Andrew Pinski  wrote:
> > 
> > > > FYI. This causes gfc_add_interface_mapping in fortrant/trans-expr.c to
> > > > be miscompiled for aarch64-linux-gnu.  I am still debugging it and
> > > > trying to get a smaller testcase.
> > > 
> > > Hello,
> > > 
> > > is there any update on this?
> > 
> > Is this a PR somewhere?
> 
> I think there are several.  But rather than a special pass I hope
> we can get to lowering all bitfield accesses somewhere and make
> our regular passes deal with the combining.  I've had multiple
> approaches at this but never went through finalizing them
> (tried doing that too early all the times I guess).

Whee.  2011 - https://gcc.gnu.org/ml/gcc-patches/2011-06/msg01233.html.
I remember updating this for DECL_BIT_FIELD_REPRESENTATIVE we have now,
simplifying this.  I also remember doing the lowering (using
DECL_BIT_FIELD_REPRESENTATIVE) at gimplification time.  And then
bitfield lowering was part of the original mem-ref branch (that didn't
get merged).

Richard.


Re: [PATCH] Add debug_function_graph_to_file

2016-02-17 Thread Marek Polacek
On Wed, Feb 17, 2016 at 02:45:36PM +0100, Richard Biener wrote:
> OTOH I have in my local trees a more convenient form (attached).
> 
> (gdb) call debug_dot_cfg (cfun, 1<<6)
> 
> and a X window with the dotted graph opens.

Is there any chance we could get this into the mainline?  I'd love to use
this without having to patch my tree locally.  If I remember correctly
there were some issues with unportable popen()?

Marek


Re: [PATCH] Add a new option "-fmerge-bitfields" (patch / doc inside)

2016-02-17 Thread Richard Biener
On Wed, 17 Feb 2016, Bernd Schmidt wrote:

> 
> 
> On 02/17/2016 02:18 PM, Daniel Gutson wrote:
> > On Wed, Nov 26, 2014 at 5:46 AM, Andrew Pinski  wrote:
> 
> > > FYI. This causes gfc_add_interface_mapping in fortrant/trans-expr.c to
> > > be miscompiled for aarch64-linux-gnu.  I am still debugging it and
> > > trying to get a smaller testcase.
> > 
> > Hello,
> > 
> > is there any update on this?
> 
> Is this a PR somewhere?

I think there are several.  But rather than a special pass I hope
we can get to lowering all bitfield accesses somewhere and make
our regular passes deal with the combining.  I've had multiple
approaches at this but never went through finalizing them
(tried doing that too early all the times I guess).

Richard.


Re: [PATCH] Add a new option "-fmerge-bitfields" (patch / doc inside)

2016-02-17 Thread Jakub Jelinek
On Wed, Feb 17, 2016 at 02:45:16PM +0100, Bernd Schmidt wrote:
> 
> 
> On 02/17/2016 02:18 PM, Daniel Gutson wrote:
> >On Wed, Nov 26, 2014 at 5:46 AM, Andrew Pinski  wrote:
> 
> >>FYI. This causes gfc_add_interface_mapping in fortrant/trans-expr.c to
> >>be miscompiled for aarch64-linux-gnu.  I am still debugging it and
> >>trying to get a smaller testcase.
> >
> >Hello,
> >
> >is there any update on this?
> 
> Is this a PR somewhere?

Perhaps related to PR22141?

Jakub


Re: [PATCH] Add a new option "-fmerge-bitfields" (patch / doc inside)

2016-02-17 Thread Bernd Schmidt



On 02/17/2016 02:18 PM, Daniel Gutson wrote:

On Wed, Nov 26, 2014 at 5:46 AM, Andrew Pinski  wrote:



FYI. This causes gfc_add_interface_mapping in fortrant/trans-expr.c to
be miscompiled for aarch64-linux-gnu.  I am still debugging it and
trying to get a smaller testcase.


Hello,

is there any update on this?


Is this a PR somewhere?


Bernd


Re: [PATCH] Add debug_function_graph_to_file

2016-02-17 Thread Richard Biener
On Wed, Feb 17, 2016 at 2:04 PM, Tom de Vries  wrote:
> Hi,
>
> once in a while I'm in a gdb debug session debugging cc1, and want to print
> the current function graph to file (see also
> https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01160.html for the non-graph
> variant).
>
> That is currently possible by doing:
> ...
> (gdb) call clean_graph_dump_file ("foo.1")
> (gdb) call print_graph_cfg ("foo.1", cfun)
> (gdb) call finish_graph_dump_file ("foo.1")
> ...
> which will generate a file "foo.1.dot".
>
> But I think a short-hand is easier.
>
> This patch adds a function debug_function_graph_to_file. It can f.i. be
> called as:
> ...
> (gdb) call debug_function_graph_to_file (cfun.decl, "foo.1.dot")
> ...
>
> [ I'll post a follow-up WIP patch that adds the flags parameter. ]
>
> OK for stage1 trunk if bootstrap and reg-test succeeds?

See other mail - python?

OTOH I have in my local trees a more convenient form (attached).

(gdb) call debug_dot_cfg (cfun, 1<<6)

and a X window with the dotted graph opens.

I'm using this a lot.  I always wondered what it takes to do this in
gdbhooks.py and thus look forward for somebody else to try sth similar there.

Richard.

> Thanks,
> - Tom
>
>


p
Description: Binary data


Re: [PATCH] Add debug_function_to_file

2016-02-17 Thread Richard Biener
On Wed, Feb 17, 2016 at 1:41 PM, Tom de Vries  wrote:
> Hi,
>
> once in a while I'm in a gdb debug session debugging cc1, and want to print
> the current function to file.
>
> There's a debug function debug_function that prints a function to stderr,
> and there are methods to redirect output of a command to a file (
> https://sourceware.org/gdb/onlinedocs/gdb/Logging-Output.html ).
>
> And there's a function dump_function_to_file that takes a FILE* parameter,
> which could be combined with open/close calls in gdb.
>
> But I think a short-hand is easier.
>
> This patch adds a function debug_function_to_file. It can f.i. be called as:
> ...
> (gdb) call debug_function_to_file (cfun.decl, "foo.1.txt", 0)
> ...
>
> Hmm, now I wonder if the order 'cfun.decl, 0, "foo.1.txt"' would make more
> sense (first two parameters the same as in debug_function).
>
> OK for stage1 trunk if bootstrap and reg-test succeeds?

Bonus for making this a helper in gdbhooks.py instead, using
fopen/fclose and the existing inferior calls.

Richard.

> Thanks,
> - Tom


[wwwdocs] Fix broken links on /readings.html

2016-02-17 Thread Jonathan Wakely

The links to http://cm.bell-labs.com/cm/cs/who/dmr/ no logner work,
these appear to be the right ones now.

Committed to CVS.

Index: htdocs/readings.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/readings.html,v
retrieving revision 1.242
diff -u -r1.242 readings.html
--- htdocs/readings.html	14 Nov 2015 23:40:21 -	1.242
+++ htdocs/readings.html	17 Feb 2016 13:38:11 -
@@ -360,11 +360,11 @@
 
   
 
-http://cm.bell-labs.com/cm/cs/who/dmr/chist.html;>The
+https://www.bell-labs.com/usr/dmr/www/chist.html;>The
 Development of the C Language, by Dennis M. Ritchie (also in
-http://cm.bell-labs.com/cm/cs/who/dmr/chist.ps;>PostScript
-and http://cm.bell-labs.com/cm/cs/who/dmr/chist.pdf;>PDF).
-An early http://cm.bell-labs.com/cm/cs/who/dmr/cman.ps;>C
+https://www.bell-labs.com/usr/dmr/www/chist.ps;>PostScript
+and https://www.bell-labs.com/usr/dmr/www/chist.pdf;>PDF).
+An early https://www.bell-labs.com/usr/dmr/www/cman.ps;>C
 Reference Manual.
 
   


Re: [PATCH][ARM] PR target/69161: Don't ignore mode when matching comparison operator in cstore-like patterns

2016-02-17 Thread Kyrill Tkachov

Hi Nick,

On 17/02/16 13:13, Nick Clifton wrote:

Hi Kyrill,


Ok for trunk?

2016-01-29  Kyrylo Tkachov  

 PR target/69161
 * config/arm/predicates.md (arm_comparison_operator_mode):
 New predicate.
 * config/arm/arm.md (*mov_scc): Use arm_comparison_operator_mode
 instead of arm_comparison_operator.
 (*mov_negscc): Likewise.
 (*mov_notscc): Likewise.
 * config/arm/thumb2.md (*thumb2_mov_scc): Likewise.
 (*thumb2_mov_negscc): Likewise.
 (*thumb2_mov_negscc_strict_it): Likewise.
 (*thumb2_mov_notscc): Likewise.
 (*thumb2_mov_notscc_strict_it): Likewise.

Approved - please apply - but ...



Thanks!


diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 
c66c31d5c6047aa7decfe7e95d111d5fbf6fb52e..b8f09ab6b109f80abe2df08a8b7f954f521ec1bf
 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -341,6 +341,11 @@ (define_special_predicate "arm_comparison_operator"
(and (match_operand 0 "expandable_comparison_operator")
 (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
  
+;; Likewise, but don't ignore the mode.

+(define_predicate "arm_comparison_operator_mode"

Please could you extend the comment here to reference the PR.  That way
anyone reading this code who wonders why we need to have two versions of
the same predicate will be able understand what is happening.


Ok, here's what I committed with r233495.

Kyrill


Cheers
   Nick



commit 59380f7f3e34f4c4e17a610e67341a0de0272c15
Author: Kyrylo Tkachov 
Date:   Wed Jan 13 13:29:36 2016 +

[ARM] PR target/69161: Don't ignore mode when matching comparison operator in cstore patterns

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 5129e85..15b4a4a 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -7190,7 +7190,7 @@ (define_expand "cstore_cc"
 
 (define_insn_and_split "*mov_scc"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(match_operator:SI 1 "arm_comparison_operator"
+	(match_operator:SI 1 "arm_comparison_operator_mode"
 	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
   "TARGET_ARM"
   "#"   ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
@@ -7207,7 +7207,7 @@ (define_insn_and_split "*mov_scc"
 
 (define_insn_and_split "*mov_negscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator_mode"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
   "TARGET_ARM"
   "#"   ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index c66c31d..f696458 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -341,6 +341,13 @@ (define_special_predicate "arm_comparison_operator"
   (and (match_operand 0 "expandable_comparison_operator")
(match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
 
+;; Likewise, but don't ignore the mode.
+;; RTL SET operations require their operands source and destination have
+;; the same modes, so we can't ignore the modes there.  See PR target/69161.
+(define_predicate "arm_comparison_operator_mode"
+  (and (match_operand 0 "expandable_comparison_operator")
+   (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
+
 (define_special_predicate "lt_ge_comparison_operator"
   (match_code "lt,ge"))
 
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 39a3d80..9925365 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -370,7 +370,7 @@ (define_insn "*thumb2_cmpsi_neg_shiftsi"
 
 (define_insn_and_split "*thumb2_mov_scc"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r")
-	(match_operator:SI 1 "arm_comparison_operator"
+	(match_operator:SI 1 "arm_comparison_operator_mode"
 	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
   "TARGET_THUMB2"
   "#"   ; "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
@@ -388,7 +388,7 @@ (define_insn_and_split "*thumb2_mov_scc"
 
 (define_insn_and_split "*thumb2_mov_negscc"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator_mode"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
   "TARGET_THUMB2 && !arm_restrict_it"
   "#"   ; "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
@@ -407,7 +407,7 @@ (define_insn_and_split "*thumb2_mov_negscc"
 
 (define_insn_and_split "*thumb2_mov_negscc_strict_it"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
-	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator_mode"
 		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
   "TARGET_THUMB2 && arm_restrict_it"
   "#"   ; ";mvn\\t%0, #0 ;it\\t%D1\;mov%D1\\t%0, #0\"
@@ -436,7 +436,7 @@ (define_insn_and_split "*thumb2_mov_negscc_strict_it"
 
 

Re: [PATCH] Add a new option "-fmerge-bitfields" (patch / doc inside)

2016-02-17 Thread Daniel Gutson
On Wed, Nov 26, 2014 at 5:46 AM, Andrew Pinski  wrote:
> On Wed, Oct 29, 2014 at 6:34 AM, Zoran Jovanovic
>  wrote:
>> Hello,
>> This is new patch version in which reported issue is fixed.
>> Also, patch is rebased to the revision 216452 and some minor code clean-up 
>> is done.
>
> FYI. This causes gfc_add_interface_mapping in fortrant/trans-expr.c to
> be miscompiled for aarch64-linux-gnu.  I am still debugging it and
> trying to get a smaller testcase.

Hello,

   is there any update on this?

Thanks,

   Daniel.


>
> Thanks,
> Andrew
>
>>
>> --
>>
>> Lowering is applied only for bit-fields copy sequences that are merged.
>> Data structure representing bit-field copy sequences is renamed and reduced 
>> in size.
>> Optimization turned on by default for -O2 and higher.
>> Some comments fixed.
>>
>> Benchmarking performed on WebKit for Android.
>> Code size reduction noticed on several files, best examples are:
>>
>> core/rendering/style/StyleMultiColData (632->520 bytes)
>> core/platform/graphics/FontDescription (1715->1475 bytes)
>> core/rendering/style/FillLayer (5069->4513 bytes)
>> core/rendering/style/StyleRareInheritedData (5618->5346)
>> core/css/CSSSelectorList(4047->3887)
>> core/platform/animation/CSSAnimationData (3844->3440 bytes)
>> core/css/resolver/FontBuilder (13818->13350 bytes)
>> core/platform/graphics/Font (16447->15975 bytes)
>>
>>
>> Example:
>>
>> One of the motivating examples for this work was copy constructor of the 
>> class which contains bit-fields.
>>
>> C++ code:
>> class A
>> {
>> public:
>> A(const A );
>> unsigned a : 1;
>> unsigned b : 2;
>> unsigned c : 4;
>> };
>>
>> A::A(const A)
>> {
>> a = x.a;
>> b = x.b;
>> c = x.c;
>> }
>>
>> GIMPLE code without optimization:
>>
>>   :
>>   _3 = x_2(D)->a;
>>   this_4(D)->a = _3;
>>   _6 = x_2(D)->b;
>>   this_4(D)->b = _6;
>>   _8 = x_2(D)->c;
>>   this_4(D)->c = _8;
>>   return;
>>
>> Optimized GIMPLE code:
>>   :
>>   _10 = x_2(D)->D.1867;
>>   _11 = BIT_FIELD_REF <_10, 7, 0>;
>>   _12 = this_4(D)->D.1867;
>>   _13 = _12 & 128;
>>   _14 = (unsigned char) _11;
>>   _15 = _13 | _14;
>>   this_4(D)->D.1867 = _15;
>>   return;
>>
>> Generated MIPS32r2 assembly code without optimization:
>>  lw  $3,0($5)
>> lbu $2,0($4)
>> andi$3,$3,0x1
>> andi$2,$2,0xfe
>> or  $2,$2,$3
>> sb  $2,0($4)
>> lw  $3,0($5)
>> andi$2,$2,0xf9
>> andi$3,$3,0x6
>> or  $2,$2,$3
>> sb  $2,0($4)
>> lw  $3,0($5)
>> andi$2,$2,0x87
>> andi$3,$3,0x78
>> or  $2,$2,$3
>> j   $31
>> sb  $2,0($4)
>>
>> Optimized MIPS32r2 assembly code:
>> lw  $3,0($5)
>> lbu $2,0($4)
>> andi$3,$3,0x7f
>> andi$2,$2,0x80
>> or  $2,$3,$2
>> j   $31
>> sb  $2,0($4)
>>
>>
>> Algorithm works on basic block level and consists of following 3 major steps:
>> 1. Go through basic block statements list. If there are statement pairs that 
>> implement copy of bit field content from one memory location to another 
>> record statements pointers and other necessary data in corresponding data 
>> structure.
>> 2. Identify records that represent adjacent bit field accesses and mark them 
>> as merged.
>> 3. Lower bit-field accesses by using new field size for those that can be 
>> merged.
>>
>>
>> New command line option "-fmerge-bitfields" is introduced.
>>
>>
>> Tested - passed gcc regression tests for MIPS32r2.
>>
>>
>> Changelog -
>>
>> gcc/ChangeLog:
>> 2014-04-22 Zoran Jovanovic (zoran.jovano...@imgtec.com)
>>   * common.opt (fmerge-bitfields): New option.
>>   * doc/invoke.texi: Add reference to "-fmerge-bitfields".
>>   * doc/invoke.texi: Add "-fmerge-bitfields" to the list of optimization
>> flags turned on at -O2.
>>   * tree-sra.c (lower_bitfields): New function.
>>   Entry for (-fmerge-bitfields).
>>   (part_of_union_p): New function.
>>   (bf_access_candidate_p): New function.
>>   (lower_bitfield_read): New function.
>>   (lower_bitfield_write): New function.
>>   (bitfield_stmt_bfcopy_pair::hash): New function.
>>   (bitfield_stmt_bfcopy_pair::equal): New function.
>>   (bitfield_stmt_bfcopy_pair::remove): New function.
>>   (create_and_insert_bfcopy): New function.
>>   (get_bit_offset): New function.
>>   (add_stmt_bfcopy_pair): New function.
>>   (cmp_bfcopies): New function.
>>   (get_merged_bit_field_size): New function.
>>   * dwarf2out.c (simple_type_size_in_bits): Move to tree.c.
>>   (field_byte_offset): Move declaration to tree.h and make it extern.
>>   * testsuite/gcc.dg/tree-ssa/bitfldmrg1.c: New test.
>>   * testsuite/gcc.dg/tree-ssa/bitfldmrg2.c: New test.
>>   * 

Re: [Patch, fortran, pr67451, v1] [5/6 Regression] ICE with sourced allocation from coarray

2016-02-17 Thread Paul Richard Thomas
Dear Andre,

I had left this to somebody else, since I am travelling!

The patch is verging on 'obvious' and so it is OK for trunk.

Could you check the line terminators please? I am seeing CR-LFs but
this might be an effect of transmission.

Thanks for the patch.

Paul

On 10 February 2016 at 12:26, Andre Vehreschild  wrote:
> Hi all,
>
> unfortunately was my last patch for pr67451 not perfect and introduced
> regressions occurring on s390(x) and with the sanitizer. These were
> caused, because when taking the array specs from the source=-expression
> also its attributes, like coarray state and so on where taken from
> there. This additionally added a corank to local objects to allocate,
> that were no coarrays overwriting data in the array handle. The attached
> patch fixes both issues.
>
> The patch for gcc-5 is not affected, because in gcc-5 the feature of
> taking the array spec from the source=-expression is not implemented.
>
> Bootstrapped and regtested ok on x86_64-linux-gnu/F23.
>
> Ok for trunk?
>
> Regards,
> Andre
>
> On Tue, 2 Feb 2016 19:24:46 +0100
> Paul Richard Thomas  wrote:
>
>> Hi Andre,
>>
>> This looks to be OK for trunk.
>>
>> I'll move to the 5-branch patch right away.
>>
>> Thanks
>>
>> Paul
>>
>> On 29 January 2016 at 19:17, Andre Vehreschild  wrote:
>> > Hi all,
>> >
>> > attached is a patch to fix a regression in current gfortran when a
>> > coarray is used in the source=-expression of an allocate(). The ICE was
>> > caused by the class information, i.e., _vptr and so on, not at the
>> > expected place. The patch fixes this.
>> >
>> > The patch also fixes pr69418, which I will flag as a duplicate in a
>> > second.
>> >
>> > Bootstrapped and regtested ok on x86_64-linux-gnu/F23.
>> >
>> > Ok for trunk?
>> >
>> > Backport to gcc-5 is pending, albeit more difficult, because the
>> > allocate() implementation on 5 is not as advanced the one in 6.
>> >
>> > Regards,
>> > Andre
>> > --
>> > Andre Vehreschild * Email: vehre ad gmx dot de
>>
>>
>>
>
>
> --
> Andre Vehreschild * Email: vehre ad gmx dot de



-- 
The difference between genius and stupidity is; genius has its limits.

Albert Einstein


Re: [PATCH][ARM] PR target/69161: Don't ignore mode when matching comparison operator in cstore-like patterns

2016-02-17 Thread Nick Clifton
Hi Kyrill,

> Ok for trunk?
> 
> 2016-01-29  Kyrylo Tkachov  
> 
> PR target/69161
> * config/arm/predicates.md (arm_comparison_operator_mode):
> New predicate.
> * config/arm/arm.md (*mov_scc): Use arm_comparison_operator_mode
> instead of arm_comparison_operator.
> (*mov_negscc): Likewise.
> (*mov_notscc): Likewise.
> * config/arm/thumb2.md (*thumb2_mov_scc): Likewise.
> (*thumb2_mov_negscc): Likewise.
> (*thumb2_mov_negscc_strict_it): Likewise.
> (*thumb2_mov_notscc): Likewise.
> (*thumb2_mov_notscc_strict_it): Likewise.

Approved - please apply - but ...

> diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
> index 
> c66c31d5c6047aa7decfe7e95d111d5fbf6fb52e..b8f09ab6b109f80abe2df08a8b7f954f521ec1bf
>  100644
> --- a/gcc/config/arm/predicates.md
> +++ b/gcc/config/arm/predicates.md
> @@ -341,6 +341,11 @@ (define_special_predicate "arm_comparison_operator"
>(and (match_operand 0 "expandable_comparison_operator")
> (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
>  
> +;; Likewise, but don't ignore the mode.
> +(define_predicate "arm_comparison_operator_mode"

Please could you extend the comment here to reference the PR.  That way
anyone reading this code who wonders why we need to have two versions of
the same predicate will be able understand what is happening.

Cheers
  Nick


[PATCH] Add debug_function_graph_to_file

2016-02-17 Thread Tom de Vries

Hi,

once in a while I'm in a gdb debug session debugging cc1, and want to 
print the current function graph to file (see also 
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01160.html for the 
non-graph variant).


That is currently possible by doing:
...
(gdb) call clean_graph_dump_file ("foo.1")
(gdb) call print_graph_cfg ("foo.1", cfun)
(gdb) call finish_graph_dump_file ("foo.1")
...
which will generate a file "foo.1.dot".

But I think a short-hand is easier.

This patch adds a function debug_function_graph_to_file. It can f.i. be 
called as:

...
(gdb) call debug_function_graph_to_file (cfun.decl, "foo.1.dot")
...

[ I'll post a follow-up WIP patch that adds the flags parameter. ]

OK for stage1 trunk if bootstrap and reg-test succeeds?

Thanks,
- Tom


Add debug_function_graph_to_file

2016-02-17  Tom de Vries  

	* graph.c (print_graph_cfg_fp): New function, factor out of ...
	(print_graph_cfg): ... here.
	(debug_function_graph_to_file): New debug function.
	* graph.h (debug_function_graph_to_file): Declare.

---
 gcc/graph.c | 37 +
 gcc/graph.h |  1 +
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/gcc/graph.c b/gcc/graph.c
index 1b28c67..6a06c03 100644
--- a/gcc/graph.c
+++ b/gcc/graph.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfganal.h"
 #include "cfgloop.h"
 #include "graph.h"
+#include "tree.h"
 
 /* DOT files with the .dot extension are recognized as document templates
by a well-known piece of word processing software out of Redmond, WA.
@@ -267,16 +268,15 @@ draw_cfg_edges (pretty_printer *pp, struct function *fun)
   pp_flush (pp);
 }
 
-/* Print a graphical representation of the CFG of function FUN.
+/* Print a graphical representation of the CFG of function FUN to file FP.
First print all basic blocks.  Draw all edges at the end to get
subgraphs right for GraphViz, which requires nodes to be defined
before edges to cluster nodes properly.  */
 
-void
-print_graph_cfg (const char *base, struct function *fun)
+static void
+print_graph_cfg_fp (FILE *fp, struct function *fun)
 {
   const char *funcname = function_name (fun);
-  FILE *fp = open_graph_file (base, "a");
   pretty_printer graph_slim_pp;
   graph_slim_pp.buffer->stream = fp;
   pretty_printer *const pp = _slim_pp;
@@ -289,6 +289,16 @@ print_graph_cfg (const char *base, struct function *fun)
   draw_cfg_edges (pp, fun);
   pp_printf (pp, "}\n");
   pp_flush (pp);
+}
+
+/* Print a graphical representation of the CFG of function FUN to a file
+   BASE.dot.  */
+
+void
+print_graph_cfg (const char *base, struct function *fun)
+{
+  FILE *fp = open_graph_file (base, "a");
+  print_graph_cfg_fp (fp, fun);
   fclose (fp);
 }
 
@@ -333,3 +343,22 @@ finish_graph_dump_file (const char *base)
   end_graph_dump (fp);
   fclose (fp);
 }
+
+/* Dump FUNCTION_DECL FN to FILENAME.  */
+
+DEBUG_FUNCTION void
+debug_function_graph_to_file (tree fn, const char *filename)
+{
+  FILE *fp = fopen (filename, "w");
+  if (fp == NULL)
+{
+  fprintf (stderr, "can't open %s for writing", filename);
+  return;
+}
+
+  start_graph_dump (fp, filename);
+  print_graph_cfg_fp (fp, DECL_STRUCT_FUNCTION (fn));
+  end_graph_dump (fp);
+
+  fclose (fp);
+}
diff --git a/gcc/graph.h b/gcc/graph.h
index fadd7c5..a770f77 100644
--- a/gcc/graph.h
+++ b/gcc/graph.h
@@ -23,5 +23,6 @@ along with GCC; see the file COPYING3.  If not see
 extern void print_graph_cfg (const char *, struct function *);
 extern void clean_graph_dump_file (const char *);
 extern void finish_graph_dump_file (const char *);
+extern void debug_function_graph_to_file (tree, const char *);
 
 #endif /* ! GCC_GRAPH_H */


[PATCH, ARM] Fix redefinition of cpp macros with #pragma GCC pop,reset

2016-02-17 Thread Christian Bruel
target_option_current_node, used in c-pragma.c to check if a state
should be popped or reseted to the previous value, was not set when
switching state with #pragma GCC target (I missed to see that, since it
is done for pop,reset). So in some cases the state might not be reset
correctly.

This patch sets it for #pragma GCC target paths and update the comments
as well to clarify this point.

As a benefit we now use this cached value instead of 
build_target_option_node (_options), this should speed up (a
little bit) this path when processing arm_neon.h.

One effect of it is that some predicate tests (e.g arm_neonv2_ok) in the
testsuite was returning the wrong value, thus marking some test as
UNRESOLVED instead of PASS. See the reduced case of the issue attached
is the patch.

Regtested, a few new PASS for -mfpu=neon-fp-armv8








2016-02-17  Christian Bruel  

	* config/arm/arm-c.c (arm_option_override): Initialize
	target_option_current_node.
	* config/arm/arm.c (arm_pragma_target_parse): Replace
	build_target_option_node call by target_option_current_node.
	Set target_option_current_node.	Fix comments.
	
2016-02-17  Christian Bruel  

	* gcc.target/arm/pragma_cpp_fma.c: New test.

Index: gcc/config/arm/arm-c.c
===
--- gcc/config/arm/arm-c.c	(revision 233489)
+++ gcc/config/arm/arm-c.c	(working copy)
@@ -199,7 +199,7 @@ arm_cpu_cpp_builtins (struct cpp_reader * pfile)
 static bool
 arm_pragma_target_parse (tree args, tree pop_target)
 {
-  tree prev_tree = build_target_option_node (_options);
+  tree prev_tree = target_option_current_node;
   tree cur_tree;
   struct cl_target_option *prev_opt;
   struct cl_target_option *cur_opt;
@@ -220,9 +220,14 @@ arm_pragma_target_parse (tree args, tree pop_targe
 TREE_TARGET_OPTION (prev_tree));
 	  return false;
 	}
+
+  /* handle_pragma_pop_options and handle_pragma_reset_options will set
+   target_option_current_node, but not handle_pragma_target.  */
+  target_option_current_node = cur_tree;
 }
 
-  /* Figure out the previous mode.  */
+  /* Update macros if target_node changes. The global state will be restored
+ by arm_set_current_function.  */
   prev_opt  = TREE_TARGET_OPTION (prev_tree);
   cur_opt   = TREE_TARGET_OPTION (cur_tree);
 
Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c	(revision 233489)
+++ gcc/config/arm/arm.c	(working copy)
@@ -3446,7 +3446,8 @@ arm_option_override (void)
 
   /* Save the initial options in case the user does function specific
  options or #pragma target.  */
-  target_option_default_node = build_target_option_node (_options);
+  target_option_default_node = target_option_current_node
+  = build_target_option_node (_options);
 
   /* Init initial mode for testing.  */
   thumb_flipper = TARGET_THUMB;
Index: gcc/testsuite/gcc.target/arm/pragma_cpp_fma.c
===
--- gcc/testsuite/gcc.target/arm/pragma_cpp_fma.c	(revision 0)
+++ gcc/testsuite/gcc.target/arm/pragma_cpp_fma.c	(working copy)
@@ -0,0 +1,36 @@
+/* Test that FMA macro is correctly undefined.  */
+/* { dg-do compile } */
+/* { dg-skip-if "Default no fma" { *-*-* } { "-mfpu=*vfpv4*" "-mfpu=*armv8"} } */
+/* { dg-require-effective-target arm_fp_ok } */
+/* { dg-add-options arm_fp } */
+
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+
+#ifndef __ARM_FEATURE_FMA
+#error "__ARM_FEATURE_FMA is not defined but should be"
+#endif
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error "__ARM_FEATURE_CRYPTO is not defined but should be"
+#endif
+
+#if __ARM_NEON_FP != 6
+#error "__ARM_NEON_FP"
+#endif
+
+#if __ARM_FP != 14
+#error "__ARM_FP"
+#endif
+
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon-vfpv4")
+#pragma GCC pop_options
+
+#ifdef __ARM_FEATURE_FMA
+#error "__ARM_FEATURE_FMA is defined but should not be"
+#endif
+
+


[PATCH] Add debug_function_to_file

2016-02-17 Thread Tom de Vries

Hi,

once in a while I'm in a gdb debug session debugging cc1, and want to 
print the current function to file.


There's a debug function debug_function that prints a function to 
stderr, and there are methods to redirect output of a command to a file 
( https://sourceware.org/gdb/onlinedocs/gdb/Logging-Output.html ).


And there's a function dump_function_to_file that takes a FILE* 
parameter, which could be combined with open/close calls in gdb.


But I think a short-hand is easier.

This patch adds a function debug_function_to_file. It can f.i. be called as:
...
(gdb) call debug_function_to_file (cfun.decl, "foo.1.txt", 0)
...

Hmm, now I wonder if the order 'cfun.decl, 0, "foo.1.txt"' would make 
more sense (first two parameters the same as in debug_function).


OK for stage1 trunk if bootstrap and reg-test succeeds?

Thanks,
- Tom
Add debug_function_to_file

2016-02-17  Tom de Vries  

	* tree-cfg.c (debug_function_to_file): New debug function.
	* tree-cfg.h (debug_function_to_file): Declare.

---
 gcc/tree-cfg.c | 14 ++
 gcc/tree-cfg.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index b54545d..8b6ae86 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -7600,6 +7600,20 @@ debug_function (tree fn, int flags)
   dump_function_to_file (fn, stderr, flags);
 }
 
+/* Dump FUNCTION_DECL FN to FILENAME using FLAGS (see TDF_* in tree.h).  */
+
+DEBUG_FUNCTION void
+debug_function_to_file (tree fn, const char *filename, int flags)
+{
+  FILE *fp = fopen (filename, "w");
+  if (fp == NULL)
+{
+  fprintf (stderr, "can%'t open %s for writing: %m", filename);
+  return;
+}
+  dump_function_to_file (fn, fp, flags);
+  fclose (fp);
+}
 
 /* Print on FILE the indexes for the predecessors of basic_block BB.  */
 
diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
index 802e292..6c573e4 100644
--- a/gcc/tree-cfg.h
+++ b/gcc/tree-cfg.h
@@ -80,6 +80,7 @@ extern basic_block move_sese_region_to_fn (struct function *, basic_block,
    basic_block, tree);
 extern void dump_function_to_file (tree, FILE *, int);
 extern void debug_function (tree, int) ;
+extern void debug_function_to_file (tree, const char *, int);
 extern void print_loops_bb (FILE *, basic_block, int, int);
 extern void print_loops (FILE *, int);
 extern void debug (struct loop );


Re: RFC: [Patch, PR Bug 60818] - ICE in validate_condition_mode on powerpc*-linux-gnu* ]

2016-02-17 Thread Segher Boessenkool
Hi Rohit,

[ Please CC: me on combine patches ].

On Tue, Feb 16, 2016 at 05:02:30AM +, Rohit Arul Raj D wrote:
> This is related to the following bug:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60818
> 
> Test case:
> unsigned int ou;
> int jv(void)
> {
>   unsigned int rg;
>   return rg < ou;
> }

> a) The test case passes with '-mno-isel' option.
>  The bug description has 2 test cases (comment #4) and both of them pass 
> with this option.

It needs -m32 -misel -mno-mfcrf to fail.

> Corresponding content of "op" which causes the ICE:
> gdb) p debug_rtx (op)
> (gtu:SI (reg:CC 166)  -- (operator and mode doesn't match)
> (const_int 0 [0]))

That is invalid RTL for this target (should be CCUNS).  Invalid RTL
should not be passed to recog.

> My initial fix was to have signed_scc_comparison_operator and 
> unsigned_scc_comparison_operator but that led to ICE from another stage of 
> combiner pass. So I thought it would be better to fix this at the stage where 
> the conditional mode is being wrongly set.

Yes.  Btw, please try to send email with lines < 76 chars or so.

> (gdb) p debug_rtx (other_insn)
> (insn 11 10 16 2 (set (reg:SI 165 [ D.2339+-3 ])
> (if_then_else:SI (ne (reg:CC 166)
> (const_int 0 [0]))
> (reg:SI 168)
> (reg:SI 167))) test.c:7 317 {isel_unsigned_si}
>  (expr_list:REG_DEAD (reg:SI 168)
> (expr_list:REG_DEAD (reg:SI 167)
> (expr_list:REG_DEAD (reg:CC 166)
> (expr_list:REG_EQUAL (gtu:SI (reg:CC 166)
> (const_int 0 [0]))
> (nil))

The REG_EQUAL there is bad already.  Where does that come from?

> +   else
> +  {
> +SUBST_MODE (regno_reg_rtx[regno], new_mode);
> +new_dest = regno_reg_rtx[regno];
> +  }
> +SUBST (SET_DEST (i2mod_new_rhs), new_dest);
> + }

Let's not SUBST outside of try_combine please.  It probably works, but
there is enough action-at-a-distance as it is.


Segher


Re: [PATCH, PR69607] Mark offload symbols as global in lto

2016-02-17 Thread Jakub Jelinek
On Wed, Feb 17, 2016 at 01:02:17PM +0100, Tom de Vries wrote:
> Mark offload symbols as global in lto

I'm really not familiar with that part of LTO, so I'm CCing Honza and
Richard here.

> 2016-02-08  Tom de Vries  
> 
>   PR lto/69607
>   * lto-partition.c (promote_offload_tables): New function.
>   * lto-partition.h (promote_offload_tables):  Declare.

Just one space instead of two after :

>   * lto.c (do_whole_program_analysis): call promote_offload_tables.

Capital C in Call.

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-36.c
> @@ -0,0 +1,4 @@
> +/* { dg-do run { target lto } } */
> +/* { dg-additional-options "-flto -flto-partition=1to1 
> -fno-toplevel-reorder" } */
> +
> +#include "target-1.c"
> diff --git a/libgomp/testsuite/libgomp.c/target-37.c 
> b/libgomp/testsuite/libgomp.c/target-37.c
> new file mode 100644
> index 000..1edb21e
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-37.c
> @@ -0,0 +1,98 @@
> +/* { dg-do run { target lto } } */
> +/* { dg-additional-sources "target-38.c" } */
> +/* { dg-additional-options "-flto -flto-partition=1to1 
> -fno-toplevel-reorder" } */
> +
> +extern
> +#ifdef __cplusplus
> +"C"
> +#endif
> +void abort (void);

Why the C++ stuff in there?  Do you intend to include the testcase
also in libgomp.c++?  If not, it is not needed.
Otherwise, the tests LGTM.

Jakub


[PATCH] Fix PR69609, BB reorder slowness

2016-02-17 Thread Richard Biener

This fixes BB reorder speed for the testcase (lots of computed gotos
in a very large function).  BB reorder time goes down from

 reorder blocks  : 260.53 (80%) usr   0.37 (27%) sys 261.80 (80%) 
wall  432597 kB (58%) ggc

to

 reorder blocks  :   1.03 ( 2%) usr   0.08 ( 7%) sys   1.08 ( 2%) 
wall  432597 kB (58%) ggc

with this patch which effectively removes a quadraticness in
the number of incoming/outgoing edges.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.  I verified
code generation is not affected on a set of cc1files.

Richard.

2016-02-17  Richard Biener  

PR rtl-optimization/69609
* bb-reorder.c (struct bbro_basic_block_data): Add priority member.
(find_traces_1_round): When ending a trace update cached priority
of successors.
(bb_to_key): Use cached priority when available.
(copy_bb): Initialize cached priority.
(reorder_basic_blocks_software_trace_cache): Likewise.

Index: gcc/bb-reorder.c
===
*** gcc/bb-reorder.c(revision 233452)
--- gcc/bb-reorder.c(working copy)
*** struct bbro_basic_block_data
*** 157,162 
--- 157,166 
/* Which trace was this bb visited in?  */
int visited;
  
+   /* Cached maximum frequency of interesting incoming edges.
+  Minus one means not yet computed.  */
+   int priority;
+ 
/* Which heap is BB in (if any)?  */
bb_heap_t *heap;
  
*** find_traces_1_round (int branch_th, int
*** 775,781 
while (best_edge);
trace->last = bb;
bbd[trace->first->index].start_of_trace = *n_traces - 1;
!   bbd[trace->last->index].end_of_trace = *n_traces - 1;
  
/* The trace is terminated so we have to recount the keys in heap
 (some block can have a lower key because now one of its predecessors
--- 779,793 
while (best_edge);
trace->last = bb;
bbd[trace->first->index].start_of_trace = *n_traces - 1;
!   if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
!   {
! bbd[trace->last->index].end_of_trace = *n_traces - 1;
! /* Update the cached maximum frequency for interesting predecessor
!edges for successors of the new trace end.  */
! FOR_EACH_EDGE (e, ei, trace->last->succs)
!   if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
! bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
!   }
  
/* The trace is terminated so we have to recount the keys in heap
 (some block can have a lower key because now one of its predecessors
*** copy_bb (basic_block old_bb, edge e, bas
*** 845,850 
--- 857,863 
  bbd[i].end_of_trace = -1;
  bbd[i].in_trace = -1;
  bbd[i].visited = 0;
+ bbd[i].priority = -1;
  bbd[i].heap = NULL;
  bbd[i].node = NULL;
}
*** bb_to_key (basic_block bb)
*** 875,881 
  {
edge e;
edge_iterator ei;
-   int priority = 0;
  
/* Use index as key to align with its original order.  */
if (optimize_function_for_size_p (cfun))
--- 888,893 
*** bb_to_key (basic_block bb)
*** 889,905 
  
/* Prefer blocks whose predecessor is an end of some trace
   or whose predecessor edge is EDGE_DFS_BACK.  */
!   FOR_EACH_EDGE (e, ei, bb->preds)
  {
!   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
!  && bbd[e->src->index].end_of_trace >= 0)
! || (e->flags & EDGE_DFS_BACK))
{
! int edge_freq = EDGE_FREQUENCY (e);
  
! if (edge_freq > priority)
!   priority = edge_freq;
}
  }
  
if (priority)
--- 901,923 
  
/* Prefer blocks whose predecessor is an end of some trace
   or whose predecessor edge is EDGE_DFS_BACK.  */
!   int priority = bbd[bb->index].priority;
!   if (priority == -1)
  {
!   priority = 0;
!   FOR_EACH_EDGE (e, ei, bb->preds)
{
! if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
!  && bbd[e->src->index].end_of_trace >= 0)
! || (e->flags & EDGE_DFS_BACK))
!   {
! int edge_freq = EDGE_FREQUENCY (e);
  
! if (edge_freq > priority)
!   priority = edge_freq;
!   }
}
+   bbd[bb->index].priority = priority;
  }
  
if (priority)
*** reorder_basic_blocks_software_trace_cach
*** 2253,2258 
--- 2271,2277 
bbd[i].end_of_trace = -1;
bbd[i].in_trace = -1;
bbd[i].visited = 0;
+   bbd[i].priority = -1;
bbd[i].heap = NULL;
bbd[i].node = NULL;
  }


Re: [PATCH, PR69607] Mark offload symbols as global in lto

2016-02-17 Thread Tom de Vries

On 08/02/16 14:00, Tom de Vries wrote:

Hi,

when running libgomp.c testsuite with "-flto -flto-partition=1to1
-fno-toplevel-reorder" we run into many compilation failures like this:
...
/tmp/.ltrans0.ltrans.o:(.gnu.offload_funcs+0x1a0): undefined
reference to `MAIN__._omp_fn.0'^M
...

The problem is that the offload table is in one lto partition, and the
function listed in the offload table is in another, without the function
having been promoted to be visible in the other partition.

The patch fixes this by promoting the symbols in the offload table such
that they're visible in all partitions.

Bootstrapped and reg-tested on x86_64.

Build for nvidia accelerator and reg-tested libgomp with various lto
settings.



Added multi-source testcase target-3{7,8}.c that triggers the PR for 
intelmicemul accelerator.


OK for trunk, stage1 (or stage4, if that's appropriate)?

Thanks,
- Tom

Mark offload symbols as global in lto

2016-02-08  Tom de Vries  

	PR lto/69607
	* lto-partition.c (promote_offload_tables): New function.
	* lto-partition.h (promote_offload_tables):  Declare.
	* lto.c (do_whole_program_analysis): call promote_offload_tables.

	* testsuite/libgomp.c/target-36.c: New test.
	* testsuite/libgomp.c/target-37.c: New test.
	* testsuite/libgomp.c/target-38.c: New test.

---
 gcc/lto/lto-partition.c | 28 ++
 gcc/lto/lto-partition.h |  1 +
 gcc/lto/lto.c   |  2 +
 libgomp/testsuite/libgomp.c/target-36.c |  4 ++
 libgomp/testsuite/libgomp.c/target-37.c | 98 +
 libgomp/testsuite/libgomp.c/target-38.c | 95 
 6 files changed, 228 insertions(+)

diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..56598d4 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-prop.h"
 #include "ipa-inline.h"
 #include "lto-partition.h"
+#include "omp-low.h"
 
 vec ltrans_partitions;
 
@@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node)
 	"Promoting as hidden: %s\n", node->name ());
 }
 
+/* Promote the symbols in the offload tables.  */
+
+void
+promote_offload_tables (void)
+{
+  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+return;
+
+  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+{
+  tree fn_decl = (*offload_funcs)[i];
+  cgraph_node *node = cgraph_node::get (fn_decl);
+  if (node->externally_visible)
+	continue;
+  promote_symbol (node);
+}
+
+  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+{
+  tree var_decl = (*offload_vars)[i];
+  varpool_node *node = varpool_node::get (var_decl);
+  if (node->externally_visible)
+	continue;
+  promote_symbol (node);
+}
+}
+
 /* Return true if NODE needs named section even if it won't land in the partition
symbol table.
FIXME: we should really not use named sections for inline clones and master
diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
index 31e3764..1a38126 100644
--- a/gcc/lto/lto-partition.h
+++ b/gcc/lto/lto-partition.h
@@ -36,6 +36,7 @@ extern vec ltrans_partitions;
 void lto_1_to_1_map (void);
 void lto_max_map (void);
 void lto_balanced_map (int);
+extern void promote_offload_tables (void);
 void lto_promote_cross_file_statics (void);
 void free_ltrans_partitions (void);
 void lto_promote_statics_nonwpa (void);
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 9dd513f..2736c5c 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3138,6 +3138,8 @@ do_whole_program_analysis (void)
  to globals with hidden visibility because they are accessed from multiple
  partitions.  */
   lto_promote_cross_file_statics ();
+  /* Promote all the offload symbols.  */
+  promote_offload_tables ();
   timevar_pop (TV_WHOPR_PARTITIONING);
 
   timevar_stop (TV_PHASE_OPT_GEN);
diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
new file mode 100644
index 000..bafb718
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-36.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target lto } } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+#include "target-1.c"
diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c
new file mode 100644
index 000..1edb21e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-37.c
@@ -0,0 +1,98 @@
+/* { dg-do run { target lto } } */
+/* { dg-additional-sources "target-38.c" } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+fn1 (double *x, double *y, int z)
+{
+  int i;
+  for (i = 0; i < z; i++)
+{
+  x[i] = i & 31;
+  y[i] = (i & 63) - 30;
+}
+}
+
+#pragma omp declare target
+static int 

[COMMITTED][AArch64] Add missing return in aarch64_internal_mov_immediate

2016-02-17 Thread Wilco Dijkstra
As part of the final cleanup of aarch64_internal_mov_immediate a return was
accidentally removed.  This causes the 2-instruction case to fallthrough
into the general case even when it found a match.  An example immediate is
0xcccd which was using 2 instructions in GCC5 but now requires 4.
Adding the return fixes the regressions.

Committed as trivial in revision 233490.

2016-02-17  Wilco Dijkstra  

gcc/
* config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
Add missing return.
--

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
a2fc3d05158e126f91e8274e9d5329f511d09530..9be5fbbd9716d9cf9568acee01996ff97ab5a654
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1632,6 +1632,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool 
generate,
  emit_insn (gen_insv_immdi (dest, GEN_INT (i),
 GEN_INT ((val >> i) & 0x)));
}
+ return 2;
}
 }



[testsuite] Skip gcc.dg/ifcvt-4.c on Visium

2016-02-17 Thread Eric Botcazou
Same reason as for ARM.

Tested on visium-elf, applied on the mainline.


2016-02-17  Eric Botcazou  

* gcc.dg/ifcvt-4.c: Skip on Visium.

-- 
Eric BotcazouIndex: gcc.dg/ifcvt-4.c
===
--- gcc.dg/ifcvt-4.c	(revision 233448)
+++ gcc.dg/ifcvt-4.c	(working copy)
@@ -1,5 +1,5 @@
 /* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3" } */
-/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* powerpc64le*-*-*" } {"*"} { "" } }  */
+/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* powerpc64le*-*-* visium-*-*" } {"*"} { "" } }  */
 
 int
 foo (int x, int y, int a)


[visium] Tidy up support of machine-specific libfuncs

2016-02-17 Thread Eric Botcazou
This adds an internal table instead of regenerating naked symbols each time 
the libfuncs are used.

Tested on visium-elf, applied on the mainline.


2016-02-17  Eric Botcazou  

* config/visium/visium.c (machine_libfunc_index): New enum.
(machine_libfuncs): New structure.
(visium_libfuncs): New static variable.
(TARGET_INIT_LIBFUNCS): Define to...
(visium_init_libfuncs): ...this.  New function.
(expand_block_move_4): Use the appropriate libfunc.
(expand_block_move_2): Likewise.
(expand_block_move_1): Likewise.
(expand_block_set_4): Likewise.
(expand_block_set_2): Likewise.
(expand_block_set_1): Likewise.
(visium_trampoline_init): Likewise.

-- 
Eric BotcazouIndex: config/visium/visium.c
===
--- config/visium/visium.c	(revision 233448)
+++ config/visium/visium.c	(working copy)
@@ -57,6 +57,41 @@
 /* This file should be included last.  */
 #include "target-def.h"
 
+/* Enumeration of indexes into machine_libfunc_table.  */
+enum machine_libfunc_index
+{
+  MLTI_long_int_memcpy,
+  MLTI_wrd_memcpy,
+  MLTI_byt_memcpy,
+
+  MLTI_long_int_memset,
+  MLTI_wrd_memset,
+  MLTI_byt_memset,
+
+  MLTI_set_trampoline_parity,
+
+  MLTI_MAX
+};
+
+struct GTY(()) machine_libfuncs
+{
+  rtx table[MLTI_MAX];
+};
+
+/* The table of Visium-specific libfuncs.  */
+static GTY(()) struct machine_libfuncs visium_libfuncs;
+
+#define vlt visium_libfuncs.table
+
+/* Accessor macros for visium_libfuncs.  */
+#define long_int_memcpy_libfunc		(vlt[MLTI_long_int_memcpy])
+#define wrd_memcpy_libfunc		(vlt[MLTI_wrd_memcpy])
+#define byt_memcpy_libfunc		(vlt[MLTI_byt_memcpy])
+#define long_int_memset_libfunc		(vlt[MLTI_long_int_memset])
+#define wrd_memset_libfunc		(vlt[MLTI_wrd_memset])
+#define byt_memset_libfunc		(vlt[MLTI_byt_memset])
+#define set_trampoline_parity_libfunc	(vlt[MLTI_set_trampoline_parity])
+
 /* Machine specific function data. */
 struct GTY (()) machine_function
 {
@@ -187,6 +222,8 @@ static bool visium_rtx_costs (rtx, machi
 
 static void visium_option_override (void);
 
+static void visium_init_libfuncs (void);
+
 static unsigned int visium_reorg (void);
 
 /* Setup the global target hooks structure.  */
@@ -282,6 +319,9 @@ static unsigned int visium_reorg (void);
 #undef  TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE visium_option_override
 
+#undef  TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS visium_init_libfuncs
+
 #undef  TARGET_CONDITIONAL_REGISTER_USAGE
 #define TARGET_CONDITIONAL_REGISTER_USAGE visium_conditional_register_usage
 
@@ -422,6 +462,23 @@ visium_option_override (void)
   register_pass (_pass_visium_reorg);
 }
 
+/* Register the Visium-specific libfuncs with the middle-end.  */
+
+static void
+visium_init_libfuncs (void)
+{
+  if (!TARGET_BMI)
+long_int_memcpy_libfunc = init_one_libfunc ("__long_int_memcpy");
+  wrd_memcpy_libfunc = init_one_libfunc ("__wrd_memcpy");
+  byt_memcpy_libfunc = init_one_libfunc ("__byt_memcpy");
+
+  long_int_memset_libfunc = init_one_libfunc ("__long_int_memset");
+  wrd_memset_libfunc = init_one_libfunc ("__wrd_memset");
+  byt_memset_libfunc = init_one_libfunc ("__byt_memset");
+
+  set_trampoline_parity_libfunc = init_one_libfunc ("__set_trampoline_parity");
+}
+
 /* Return the number of instructions that can issue on the same cycle.  */
 
 static int
@@ -2226,7 +2283,6 @@ visium_split_cstore (enum rtx_code op_co
 static void
 expand_block_move_4 (rtx dst, rtx dst_reg, rtx src, rtx src_reg, rtx bytes_rtx)
 {
-  const rtx sym = gen_rtx_SYMBOL_REF (Pmode, "__long_int_memcpy");
   unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
   unsigned int rem = bytes % 4;
 
@@ -2250,8 +2306,9 @@ expand_block_move_4 (rtx dst, rtx dst_re
   emit_insn (insn);
 }
   else
-emit_library_call (sym, LCT_NORMAL, VOIDmode, 3, dst_reg, Pmode, src_reg,
-		   Pmode,
+emit_library_call (long_int_memcpy_libfunc, LCT_NORMAL, VOIDmode, 3,
+		   dst_reg, Pmode,
+		   src_reg, Pmode,
 		   convert_to_mode (TYPE_MODE (sizetype),
 	GEN_INT (bytes >> 2),
 TYPE_UNSIGNED (sizetype)),
@@ -2282,12 +2339,12 @@ expand_block_move_4 (rtx dst, rtx dst_re
 static void
 expand_block_move_2 (rtx dst, rtx dst_reg, rtx src, rtx src_reg, rtx bytes_rtx)
 {
-  const rtx sym = gen_rtx_SYMBOL_REF (Pmode, "__wrd_memcpy");
   unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
   unsigned int rem = bytes % 2;
 
-  emit_library_call (sym, LCT_NORMAL, VOIDmode, 3, dst_reg, Pmode, src_reg,
-		 Pmode,
+  emit_library_call (wrd_memcpy_libfunc, LCT_NORMAL, VOIDmode, 3,
+		 dst_reg, Pmode,
+		 src_reg, Pmode,
 		 convert_to_mode (TYPE_MODE (sizetype),
   GEN_INT (bytes >> 1),
   TYPE_UNSIGNED (sizetype)),
@@ -2309,9 +2366,8 @@ expand_block_move_2 (rtx dst, rtx dst_re
 static void
 expand_block_move_1 (rtx dst_reg, rtx src_reg, rtx bytes_rtx)
 

Re: [RFC] [P2] [PR tree-optimization/33562] Lowering more complex assignments.

2016-02-17 Thread Richard Biener
On Wed, Feb 17, 2016 at 8:30 AM, Jeff Law  wrote:
> On 02/14/2016 11:38 AM, Richard Biener wrote:
>>
>> On February 14, 2016 5:35:13 PM GMT+01:00, Jeff Law 
>> wrote:
>>>
>>> On 02/12/2016 10:21 AM, Jeff Law wrote:
>
> But really we simply need a better DSE algorithm.

 So the way to fix DSE is to keep the existing algorithm and track the
 hunks of Complex/aggregates that have been set a second time.

 My first thought was to implement this as an inverted bitmap.  ie,
>>>
>>> set

 it to 1 for every byte in the complex/aggregate that is set by the
>>>
>>> first

 store.  Clear bits for each byte in subsequent stores to the pieces.
>>>
>>> If

 the bitmap reaches an empty state, then the initial store is dead.

 Adjusting *ref could work too, but would probably be painful if the
 subsequent stores don't happen in a convenient order.
>>>
>>> So that was scary easy.  We should have done this a long time ago.
>>>
>>> Essentially I call ao_get_ref_base to get the offset/size/max_size
>>> filled in for the first statement.  Those are used to initialize the
>>> live bytes bitfield, as long as max_size != -1.
>>>
>>> Then when we have a possible killing statement, we use the ao in a
>>> similar manner to determine which bytes to clear (taking care that the
>>> base is the same between the two references and that in the killing
>>> statement that the size/max_size are the same.).
>>>
>>> When all the live bytes are zero then we've killed the original
>>> statement.
>>>
>>> It's ~20 lines of code.
>>>
>>> I need to pull together some additional tests, but it looks likely
>>> we'll
>>> be able to wrap this up easily for gcc-6.
>>
>>
>> BTW, we had sth like this before but it had both correctness and more
>> importantly scalability issues.
>
> Just a couple more tibits.
>
> I instrumented a bootstrap -- the improved DSE finds ~20k additional DSE
> opportunities during a GCC bootstrap that could not be found by the current
> DSE.  Yes, 20k additional statements deleted by tree DSE.  Yow!

Well, DCE also can do quite some DSE and it runs after DSE - did that 20k
more DSE affect the overall end-result?

> Of those additional opportunities, none require bit level tracking.  So we
> can just punt when the size/offset is not byte sized/aligned.

Yep.  I expect us to eventually lower all those bit-precision stuff.

> Of those additional opportunities > 99% are for sizes of 64 bytes or
> smaller.  Thus we can pack those into 1 or 2 bitmap elements, depending on
> the starting offset.  So the bitmap side will be efficient with no real
> searching if we choose our PARAM value wisely.

So then please use a uint64_t or even uint32_t mask please.  Which means
a fixed size SBITMAP (32 bits) if you like to use the bitmap interface.

> The outliers are, well, strange.  There were cases where we found new DSE
> opportunities for objects of size 2k bytes or larger.  There weren't many of
> these, but I was surprised at the size.  Most likely it's a clobber or mem*
> thing that's participating in DSE.  But I haven't looked closely at those
> cases yet.

I suspect it's memset followed by actually initializing all elements.  We have
quite some of those I think.

> There's a ton of statements that are clobbering zero-sized objects.  My code
> can determine when those clobbers are redundant (with some later clobber),
> but I haven't looked closely to see if that's actually a good thing to do or
> not.
>
> Anyway, I still don't see anything which makes me think this can't wrap-up
> in the immediate future.

Can you share your work-in-progress patch?

Thanks,
Richard.

> jeff


Re: [PING] genattrab.c generate switch

2016-02-17 Thread Jesper Broge Jørgensen


On 19/01/16 10:44, Richard Biener wrote:

On Mon, Jan 18, 2016 at 7:48 PM, Jeff Law  wrote:

On 01/18/2016 07:09 AM, Jesper Broge Jørgensen wrote:

Ping patch:

https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00784.html

I'd put it in my gcc-7 queue.  But if Richard, Bernd, Richi or someone else
wants to work though the changes as a bugfix for bootstrapping on platforms
with crippled compilers, I won't object.

I'd take it as a bugfix but the patch still needs review.

Richard.


jeff
I have finally received a confirmation from fsf that they received my 
copyright assignment so i am now ready to have the patch reviewed.


Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2016-02-17 Thread Kyrill Tkachov


On 17/02/16 10:20, Christophe Lyon wrote:

On 17 February 2016 at 11:05, Kyrill Tkachov
 wrote:

On 17/02/16 10:03, Christophe Lyon wrote:

On 15 February 2016 at 12:32, Kyrill Tkachov
 wrote:

On 04/02/16 08:58, Ramana Radhakrishnan wrote:

On Tue, Jun 30, 2015 at 2:15 AM, Jim Wilson 
wrote:

This is my suggested fix for PR 65932, which is a linux kernel
miscompile with gcc-5.1.

The problem here is caused by a chain of events.  The first is that
the relatively new eipa_sra pass creates fake parameters that behave
slightly differently than normal parameters.  The second is that the
optimizer creates phi nodes that copy local variables to fake
parameters and/or vice versa.  The third is that the ouf-of-ssa pass
assumes that it can emit simple move instructions for these phi nodes.
And the fourth is that the ARM port has a PROMOTE_MODE macro that
forces QImode and HImode to unsigned, but a
TARGET_PROMOTE_FUNCTION_MODE hook that does not.  So signed char and
short parameters have different in register representations than local
variables, and require a conversion when copying between them, a
conversion that the out-of-ssa pass can't easily emit.

Ultimately, I think this is a problem in the arm backend.  It should
not have a PROMOTE_MODE macro that is changing the sign of char and
short local variables.  I also think that we should merge the
PROMOTE_MODE macro with the TARGET_PROMOTE_FUNCTION_MODE hook to
prevent this from happening again.

I see four general problems with the current ARM PROMOTE_MODE
definition.
1) Unsigned char is only faster for armv5 and earlier, before the sxtb
instruction was added.  It is a lose for armv6 and later.
2) Unsigned short was only faster for targets that don't support
unaligned accesses.  Support for these targets was removed a while
ago, and this PROMODE_MODE hunk should have been removed at the same
time.  It was accidentally left behind.
3) TARGET_PROMOTE_FUNCTION_MODE used to be a boolean hook, when it was
converted to a function, the PROMOTE_MODE code was copied without the
UNSIGNEDP changes.  Thus it is only an accident that
TARGET_PROMOTE_FUNCTION_MODE and PROMOTE_MODE disagree.  Changing
TARGET_PROMOTE_FUNCTION_MODE is an ABI change, so only PROMOTE_MODE
changes to resolve the difference are safe.
4) There is a general principle that you should only change signedness
in PROMOTE_MODE if the hardware forces it, as otherwise this results
in extra conversion instructions that make code slower.  The mips64
hardware for instance requires that 32-bit values be sign-extended
regardless of type, and instructions may trap if this is not true.
However, it has a set of 32-bit instructions that operate on these
values, and hence no conversions are required.  There is no similar
case on ARM. Thus the conversions are unnecessary and unwise.  This
can be seen in the testcases where gcc emits both a zero-extend and a
sign-extend inside a loop, as the sign-extend is required for a
compare, and the zero-extend is required by PROMOTE_MODE.

Given Kyrill's testing with the patch and the reasonably detailed
check of the effects of code generation changes - The arm.h hunk is ok
- I do think we should make this explicit in the documentation that
TARGET_PROMOTE_MODE and TARGET_PROMOTE_FUNCTION_MODE should agree and
better still maybe put in a checking assert for the same in the
mid-end but that could be the subject of a follow-up patch.

Ok to apply just the arm.h hunk as I think Kyrill has taken care of
the testsuite fallout separately.

Hi all,

I'd like to backport the arm.h from this ( r233130) to the GCC 5
branch. As the CSE patch from my series had some fallout on x86_64
due to a deficiency in the AVX patterns that is too invasive to fix
at this stage (and presumably backport), I'd like to just backport
this arm.h fix and adjust the tests to XFAIL the fallout that comes
with not applying the CSE patch. The attached patch does that.

The code quality fallout on code outside the testsuite is not
that gread. The SPEC benchmarks are not affected by not applying
the CSE change, and only a single sequence in a popular embedded
benchmark
shows some degradation for -mtune=cortex-a9 in the same way as the
wmul-1.c and wmul-2.c tests.

I think that's a fair tradeoff for fixing the wrong code bug on that
branch.

Ok to backport r233130 and the attached testsuite patch to the GCC 5
branch?

Thanks,
Kyrill

2016-02-15  Kyrylo Tkachov  

  PR target/65932
  * gcc.target/arm/wmul-1.c: Add -mtune=cortex-a9 to dg-options.
  xfail the scan-assembler test.
  * gcc.target/arm/wmul-2.c: Likewise.
  * gcc.target/arm/wmul-3.c: Simplify test to generate a single
smulbb.



Hi Kyrill,

I've noticed that wmul-3 still fails on the gcc-5 branch when forcing GCC
configuration to:
--with-cpu cortex-a5 --with-fpu vfpv3-d16-fp16
(target arm-none-linux-gnueabihf)

The generated code is:

Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2016-02-17 Thread Christophe Lyon
On 17 February 2016 at 11:05, Kyrill Tkachov
 wrote:
>
> On 17/02/16 10:03, Christophe Lyon wrote:
>>
>> On 15 February 2016 at 12:32, Kyrill Tkachov
>>  wrote:
>>>
>>> On 04/02/16 08:58, Ramana Radhakrishnan wrote:

 On Tue, Jun 30, 2015 at 2:15 AM, Jim Wilson 
 wrote:
>
> This is my suggested fix for PR 65932, which is a linux kernel
> miscompile with gcc-5.1.
>
> The problem here is caused by a chain of events.  The first is that
> the relatively new eipa_sra pass creates fake parameters that behave
> slightly differently than normal parameters.  The second is that the
> optimizer creates phi nodes that copy local variables to fake
> parameters and/or vice versa.  The third is that the ouf-of-ssa pass
> assumes that it can emit simple move instructions for these phi nodes.
> And the fourth is that the ARM port has a PROMOTE_MODE macro that
> forces QImode and HImode to unsigned, but a
> TARGET_PROMOTE_FUNCTION_MODE hook that does not.  So signed char and
> short parameters have different in register representations than local
> variables, and require a conversion when copying between them, a
> conversion that the out-of-ssa pass can't easily emit.
>
> Ultimately, I think this is a problem in the arm backend.  It should
> not have a PROMOTE_MODE macro that is changing the sign of char and
> short local variables.  I also think that we should merge the
> PROMOTE_MODE macro with the TARGET_PROMOTE_FUNCTION_MODE hook to
> prevent this from happening again.
>
> I see four general problems with the current ARM PROMOTE_MODE
> definition.
> 1) Unsigned char is only faster for armv5 and earlier, before the sxtb
> instruction was added.  It is a lose for armv6 and later.
> 2) Unsigned short was only faster for targets that don't support
> unaligned accesses.  Support for these targets was removed a while
> ago, and this PROMODE_MODE hunk should have been removed at the same
> time.  It was accidentally left behind.
> 3) TARGET_PROMOTE_FUNCTION_MODE used to be a boolean hook, when it was
> converted to a function, the PROMOTE_MODE code was copied without the
> UNSIGNEDP changes.  Thus it is only an accident that
> TARGET_PROMOTE_FUNCTION_MODE and PROMOTE_MODE disagree.  Changing
> TARGET_PROMOTE_FUNCTION_MODE is an ABI change, so only PROMOTE_MODE
> changes to resolve the difference are safe.
> 4) There is a general principle that you should only change signedness
> in PROMOTE_MODE if the hardware forces it, as otherwise this results
> in extra conversion instructions that make code slower.  The mips64
> hardware for instance requires that 32-bit values be sign-extended
> regardless of type, and instructions may trap if this is not true.
> However, it has a set of 32-bit instructions that operate on these
> values, and hence no conversions are required.  There is no similar
> case on ARM. Thus the conversions are unnecessary and unwise.  This
> can be seen in the testcases where gcc emits both a zero-extend and a
> sign-extend inside a loop, as the sign-extend is required for a
> compare, and the zero-extend is required by PROMOTE_MODE.

 Given Kyrill's testing with the patch and the reasonably detailed
 check of the effects of code generation changes - The arm.h hunk is ok
 - I do think we should make this explicit in the documentation that
 TARGET_PROMOTE_MODE and TARGET_PROMOTE_FUNCTION_MODE should agree and
 better still maybe put in a checking assert for the same in the
 mid-end but that could be the subject of a follow-up patch.

 Ok to apply just the arm.h hunk as I think Kyrill has taken care of
 the testsuite fallout separately.
>>>
>>> Hi all,
>>>
>>> I'd like to backport the arm.h from this ( r233130) to the GCC 5
>>> branch. As the CSE patch from my series had some fallout on x86_64
>>> due to a deficiency in the AVX patterns that is too invasive to fix
>>> at this stage (and presumably backport), I'd like to just backport
>>> this arm.h fix and adjust the tests to XFAIL the fallout that comes
>>> with not applying the CSE patch. The attached patch does that.
>>>
>>> The code quality fallout on code outside the testsuite is not
>>> that gread. The SPEC benchmarks are not affected by not applying
>>> the CSE change, and only a single sequence in a popular embedded
>>> benchmark
>>> shows some degradation for -mtune=cortex-a9 in the same way as the
>>> wmul-1.c and wmul-2.c tests.
>>>
>>> I think that's a fair tradeoff for fixing the wrong code bug on that
>>> branch.
>>>
>>> Ok to backport r233130 and the attached testsuite patch to the GCC 5
>>> branch?
>>>
>>> Thanks,
>>> Kyrill
>>>
>>> 2016-02-15  Kyrylo Tkachov  
>>>
>>>  PR target/65932
>>>  * 

Re: [PATCH][ARM][RFC] PR target/65578 Fix gcc.dg/torture/stackalign/builtin-apply-4.c for single-precision fpus

2016-02-17 Thread Kyrill Tkachov

Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00634.html

As mentioned before, this is actually a fix for PR target/69538.
I got confused when writing the cover letter and ChangeLog...

Thanks,
Kyrill

On 09/02/16 17:24, Kyrill Tkachov wrote:


On 09/02/16 17:21, Kyrill Tkachov wrote:

Hi all,

In this wrong-code PR the builtin-apply-4.c test fails with -flto but only when 
targeting an fpu
with only single-precision capabilities.

bar is a function returing a double. For non-LTO compilation the caller of bar 
reads the return value
from it from the s0 and s1 VFP registers like expected, but for -flto the 
caller seems to expect the
return value from the r0 and r1 regs.  The RTL dumps show that too.

Debugging the calls to arm_function_value show that in the -flto compilation 
the function bar is deemed
to be a local function call and assigned the ARM_PCS_AAPCS_LOCAL PCS variant, 
whereas for the non-LTO (and non-breaking)
compilation it uses the ARM_PCS_AAPCS_VFP variant.

Further down in use_vfp_abi when deciding whether to use VFP registers for the 
result there is a bit of
logic that rejects VFP registers when handling the ARM_PCS_AAPCS_LOCAL variant 
with a double precision value
on an FPU that is not TARGET_VFP_DOUBLE.

This seems wrong for ARM_PCS_AAPCS_LOCAL to me. ARM_PCS_AAPCS_LOCAL means that 
the function doesn't escape
the translation unit and we can thus use whatever variant we want. From what I 
understand we want to use the
VFP regs when possible for FP values.

So this patch removes that restriction and for the testcase the caller of bar 
correctly reads the return
value of bar from the VFP registers and everything works.

This patch has been bootstrapped and tested on arm-none-linux-gnueabihf 
configured with --with-fpu=fpv4-sp-d16.
The bootstrapped was performed with LTO.
I didn't see any regressions.

It seems that this logic was put there in 2009 with r154034 as part of a large 
patch to enable support for half-precision
floating point.

I'm not very familiar with this part of the code, so is this a safe patch to do?
The patch should only ever change behaviour for single-precision-only fpus and 
only for static functions
that don't get called outside their translation units (or during LTO I suppose) 
so there shouldn't
be any ABI problems, I think.

Is this ok for trunk?

Thanks,
Kyrill



Huh, I just realised I wrote completely the wrong PR number on this.
The PR I'm talking about here is PR target/69538

Sorry for the confusion.

Kyrill



2016-02-09 Kyrylo Tkachov 

PR target/65578
* config/arm/arm.c (use_vfp_abi): Remove id_double argument.
Don't check for is_double and TARGET_VFP_DOUBLE.
(aapcs_vfp_is_call_or_return_candidate): Update callsite.
(aapcs_vfp_is_return_candidate): Likewise.
(aapcs_vfp_is_call_candidate): Likewise.
(aapcs_vfp_allocate_return_reg): Likewise.






Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2016-02-17 Thread Kyrill Tkachov


On 17/02/16 10:03, Christophe Lyon wrote:

On 15 February 2016 at 12:32, Kyrill Tkachov
 wrote:

On 04/02/16 08:58, Ramana Radhakrishnan wrote:

On Tue, Jun 30, 2015 at 2:15 AM, Jim Wilson  wrote:

This is my suggested fix for PR 65932, which is a linux kernel
miscompile with gcc-5.1.

The problem here is caused by a chain of events.  The first is that
the relatively new eipa_sra pass creates fake parameters that behave
slightly differently than normal parameters.  The second is that the
optimizer creates phi nodes that copy local variables to fake
parameters and/or vice versa.  The third is that the ouf-of-ssa pass
assumes that it can emit simple move instructions for these phi nodes.
And the fourth is that the ARM port has a PROMOTE_MODE macro that
forces QImode and HImode to unsigned, but a
TARGET_PROMOTE_FUNCTION_MODE hook that does not.  So signed char and
short parameters have different in register representations than local
variables, and require a conversion when copying between them, a
conversion that the out-of-ssa pass can't easily emit.

Ultimately, I think this is a problem in the arm backend.  It should
not have a PROMOTE_MODE macro that is changing the sign of char and
short local variables.  I also think that we should merge the
PROMOTE_MODE macro with the TARGET_PROMOTE_FUNCTION_MODE hook to
prevent this from happening again.

I see four general problems with the current ARM PROMOTE_MODE definition.
1) Unsigned char is only faster for armv5 and earlier, before the sxtb
instruction was added.  It is a lose for armv6 and later.
2) Unsigned short was only faster for targets that don't support
unaligned accesses.  Support for these targets was removed a while
ago, and this PROMODE_MODE hunk should have been removed at the same
time.  It was accidentally left behind.
3) TARGET_PROMOTE_FUNCTION_MODE used to be a boolean hook, when it was
converted to a function, the PROMOTE_MODE code was copied without the
UNSIGNEDP changes.  Thus it is only an accident that
TARGET_PROMOTE_FUNCTION_MODE and PROMOTE_MODE disagree.  Changing
TARGET_PROMOTE_FUNCTION_MODE is an ABI change, so only PROMOTE_MODE
changes to resolve the difference are safe.
4) There is a general principle that you should only change signedness
in PROMOTE_MODE if the hardware forces it, as otherwise this results
in extra conversion instructions that make code slower.  The mips64
hardware for instance requires that 32-bit values be sign-extended
regardless of type, and instructions may trap if this is not true.
However, it has a set of 32-bit instructions that operate on these
values, and hence no conversions are required.  There is no similar
case on ARM. Thus the conversions are unnecessary and unwise.  This
can be seen in the testcases where gcc emits both a zero-extend and a
sign-extend inside a loop, as the sign-extend is required for a
compare, and the zero-extend is required by PROMOTE_MODE.

Given Kyrill's testing with the patch and the reasonably detailed
check of the effects of code generation changes - The arm.h hunk is ok
- I do think we should make this explicit in the documentation that
TARGET_PROMOTE_MODE and TARGET_PROMOTE_FUNCTION_MODE should agree and
better still maybe put in a checking assert for the same in the
mid-end but that could be the subject of a follow-up patch.

Ok to apply just the arm.h hunk as I think Kyrill has taken care of
the testsuite fallout separately.

Hi all,

I'd like to backport the arm.h from this ( r233130) to the GCC 5
branch. As the CSE patch from my series had some fallout on x86_64
due to a deficiency in the AVX patterns that is too invasive to fix
at this stage (and presumably backport), I'd like to just backport
this arm.h fix and adjust the tests to XFAIL the fallout that comes
with not applying the CSE patch. The attached patch does that.

The code quality fallout on code outside the testsuite is not
that gread. The SPEC benchmarks are not affected by not applying
the CSE change, and only a single sequence in a popular embedded benchmark
shows some degradation for -mtune=cortex-a9 in the same way as the
wmul-1.c and wmul-2.c tests.

I think that's a fair tradeoff for fixing the wrong code bug on that branch.

Ok to backport r233130 and the attached testsuite patch to the GCC 5 branch?

Thanks,
Kyrill

2016-02-15  Kyrylo Tkachov  

 PR target/65932
 * gcc.target/arm/wmul-1.c: Add -mtune=cortex-a9 to dg-options.
 xfail the scan-assembler test.
 * gcc.target/arm/wmul-2.c: Likewise.
 * gcc.target/arm/wmul-3.c: Simplify test to generate a single smulbb.



Hi Kyrill,

I've noticed that wmul-3 still fails on the gcc-5 branch when forcing GCC
configuration to:
--with-cpu cortex-a5 --with-fpu vfpv3-d16-fp16
(target arm-none-linux-gnueabihf)

The generated code is:
 sxthr0, r0
 sxthr1, r1
 mul r0, r1, r0
instead of
 smulbb  r0, r1, r0
on trunk.

I guess 

Re: [PATCH, ARM] stop changing signedness in PROMOTE_MODE

2016-02-17 Thread Christophe Lyon
On 15 February 2016 at 12:32, Kyrill Tkachov
 wrote:
>
> On 04/02/16 08:58, Ramana Radhakrishnan wrote:
>>
>> On Tue, Jun 30, 2015 at 2:15 AM, Jim Wilson  wrote:
>>>
>>> This is my suggested fix for PR 65932, which is a linux kernel
>>> miscompile with gcc-5.1.
>>>
>>> The problem here is caused by a chain of events.  The first is that
>>> the relatively new eipa_sra pass creates fake parameters that behave
>>> slightly differently than normal parameters.  The second is that the
>>> optimizer creates phi nodes that copy local variables to fake
>>> parameters and/or vice versa.  The third is that the ouf-of-ssa pass
>>> assumes that it can emit simple move instructions for these phi nodes.
>>> And the fourth is that the ARM port has a PROMOTE_MODE macro that
>>> forces QImode and HImode to unsigned, but a
>>> TARGET_PROMOTE_FUNCTION_MODE hook that does not.  So signed char and
>>> short parameters have different in register representations than local
>>> variables, and require a conversion when copying between them, a
>>> conversion that the out-of-ssa pass can't easily emit.
>>>
>>> Ultimately, I think this is a problem in the arm backend.  It should
>>> not have a PROMOTE_MODE macro that is changing the sign of char and
>>> short local variables.  I also think that we should merge the
>>> PROMOTE_MODE macro with the TARGET_PROMOTE_FUNCTION_MODE hook to
>>> prevent this from happening again.
>>>
>>> I see four general problems with the current ARM PROMOTE_MODE definition.
>>> 1) Unsigned char is only faster for armv5 and earlier, before the sxtb
>>> instruction was added.  It is a lose for armv6 and later.
>>> 2) Unsigned short was only faster for targets that don't support
>>> unaligned accesses.  Support for these targets was removed a while
>>> ago, and this PROMODE_MODE hunk should have been removed at the same
>>> time.  It was accidentally left behind.
>>> 3) TARGET_PROMOTE_FUNCTION_MODE used to be a boolean hook, when it was
>>> converted to a function, the PROMOTE_MODE code was copied without the
>>> UNSIGNEDP changes.  Thus it is only an accident that
>>> TARGET_PROMOTE_FUNCTION_MODE and PROMOTE_MODE disagree.  Changing
>>> TARGET_PROMOTE_FUNCTION_MODE is an ABI change, so only PROMOTE_MODE
>>> changes to resolve the difference are safe.
>>> 4) There is a general principle that you should only change signedness
>>> in PROMOTE_MODE if the hardware forces it, as otherwise this results
>>> in extra conversion instructions that make code slower.  The mips64
>>> hardware for instance requires that 32-bit values be sign-extended
>>> regardless of type, and instructions may trap if this is not true.
>>> However, it has a set of 32-bit instructions that operate on these
>>> values, and hence no conversions are required.  There is no similar
>>> case on ARM. Thus the conversions are unnecessary and unwise.  This
>>> can be seen in the testcases where gcc emits both a zero-extend and a
>>> sign-extend inside a loop, as the sign-extend is required for a
>>> compare, and the zero-extend is required by PROMOTE_MODE.
>>
>> Given Kyrill's testing with the patch and the reasonably detailed
>> check of the effects of code generation changes - The arm.h hunk is ok
>> - I do think we should make this explicit in the documentation that
>> TARGET_PROMOTE_MODE and TARGET_PROMOTE_FUNCTION_MODE should agree and
>> better still maybe put in a checking assert for the same in the
>> mid-end but that could be the subject of a follow-up patch.
>>
>> Ok to apply just the arm.h hunk as I think Kyrill has taken care of
>> the testsuite fallout separately.
>
> Hi all,
>
> I'd like to backport the arm.h from this ( r233130) to the GCC 5
> branch. As the CSE patch from my series had some fallout on x86_64
> due to a deficiency in the AVX patterns that is too invasive to fix
> at this stage (and presumably backport), I'd like to just backport
> this arm.h fix and adjust the tests to XFAIL the fallout that comes
> with not applying the CSE patch. The attached patch does that.
>
> The code quality fallout on code outside the testsuite is not
> that gread. The SPEC benchmarks are not affected by not applying
> the CSE change, and only a single sequence in a popular embedded benchmark
> shows some degradation for -mtune=cortex-a9 in the same way as the
> wmul-1.c and wmul-2.c tests.
>
> I think that's a fair tradeoff for fixing the wrong code bug on that branch.
>
> Ok to backport r233130 and the attached testsuite patch to the GCC 5 branch?
>
> Thanks,
> Kyrill
>
> 2016-02-15  Kyrylo Tkachov  
>
> PR target/65932
> * gcc.target/arm/wmul-1.c: Add -mtune=cortex-a9 to dg-options.
> xfail the scan-assembler test.
> * gcc.target/arm/wmul-2.c: Likewise.
> * gcc.target/arm/wmul-3.c: Simplify test to generate a single smulbb.
>
>
Hi Kyrill,

I've noticed that wmul-3 still fails on the gcc-5 branch when forcing GCC
configuration to:
--with-cpu 

Commit: MSP430: Update devices list

2016-02-17 Thread Nick Clifton
Hi Guys,

  I am checking in this patch to update the list of MSP430 devices
  built in to the msp430 backend.

Cheers
  Nick

gcc/ChangeLog
2016-02-17  Nick Clifton  

* config/msp430/msp430.c (msp430_mcu_data): Sync with data from
TI's devices.csv file as of March 2016.

Index: gcc/config/msp430/msp430.c
===
--- gcc/config/msp430/msp430.c  (revision 233486)
+++ gcc/config/msp430/msp430.c  (working copy)
@@ -90,10 +90,10 @@
 #define TARGET_OPTION_OVERRIDE msp430_option_override
 
 /* This is a copy of the same data structure found in gas/config/tc-msp430.c
-   Also another (sort-of) copy can be found in 
gcc/config/msp430/devices-msp430.c
+   Also another (sort-of) copy can be found in gcc/config/msp430/t-msp430
Keep these three structures in sync.
The data in this structure has been extracted from the devices.csv file
-   released by TI, updated as of 8 October 2015.  */
+   released by TI, updated as of March 2016.  */
 
 struct msp430_mcu_data
 {
@@ -519,7 +519,13 @@
   { "msp430fg6626",2,8 },
   { "msp430fr2032",2,0 },
   { "msp430fr2033",2,0 },
+  { "msp430fr2310",2,0 },
+  { "msp430fr2311",2,0 },
   { "msp430fr2433",2,8 },
+  { "msp430fr2532",2,8 },
+  { "msp430fr2533",2,8 },
+  { "msp430fr2632",2,8 },
+  { "msp430fr2633",2,8 },
   { "msp430fr2xx_4xxgeneric",2,8 },
   { "msp430fr4131",2,0 },
   { "msp430fr4132",2,0 },
@@ -553,6 +559,8 @@
   { "msp430fr5858",2,8 },
   { "msp430fr5859",2,8 },
   { "msp430fr5867",2,8 },
+  { "msp430fr5862",2,8 },
+  { "msp430fr5864",2,8 },
   { "msp430fr58671",2,8 },
   { "msp430fr5868",2,8 },
   { "msp430fr5869",2,8 },
@@ -563,6 +571,8 @@
   { "msp430fr5888",2,8 },
   { "msp430fr5889",2,8 },
   { "msp430fr58891",2,8 },
+  { "msp430fr5892",2,8 },
+  { "msp430fr5894",2,8 },
   { "msp430fr5922",2,8 },
   { "msp430fr59221",2,8 },
   { "msp430fr5947",2,8 },
@@ -572,6 +582,8 @@
   { "msp430fr5957",2,8 },
   { "msp430fr5958",2,8 },
   { "msp430fr5959",2,8 },
+  { "msp430fr5962",2,8 },
+  { "msp430fr5964",2,8 },
   { "msp430fr5967",2,8 },
   { "msp430fr5968",2,8 },
   { "msp430fr5969",2,8 },
@@ -584,6 +596,8 @@
   { "msp430fr5988",2,8 },
   { "msp430fr5989",2,8 },
   { "msp430fr59891",2,8 },
+  { "msp430fr5992",2,8 },
+  { "msp430fr5994",2,8 },
   { "msp430fr5xx_6xxgeneric",2,8 },
   { "msp430fr6820",2,8 },
   { "msp430fr6822",2,8 },


[Ada] Fix missing synchronization for Atomic_Components on array object

2016-02-17 Thread Eric Botcazou
This plugs a small hole in the implementation of atomic synchronization: the
compiler fails to enforce it if a pragma/aspect Has_Atomic_Components is put
directly on an array object instead of an array type.  It's not a regression 
but the issue is annoying and the fix is trivial.

Tested on x86_64-suse-linux, applied on the mainline and 5 branch.


2016-02-17  Eric Botcazou  

* exp_ch4.adb (Expand_N_Indexed_Component): Active synchronization if
the prefix denotes an entity which Has_Atomic_Components.
* gcc-interface/trans.c (node_is_atomic): Return true if the prefix
denotes an entity which Has_Atomic_Components.


2016-02-17  Eric Botcazou  

* gnat.dg/atomic8.adb: New test.

-- 
Eric Botcazou-- { dg-do run }

procedure Atomic8 is

   V : array (1 .. 2) of Natural := (0,0) with Atomic_Components;

   task type TT1;
   task body TT1 is
   begin
  while V (1) + V (2) < 1_000_000 loop
 V (1) := V (1) + 1;
  end loop;
   end TT1;

   task type TT2;
   task body TT2 is
   begin
  while V (1) + V (2) < 1_000_000 loop
 V (2) := V (2) + 1;
  end loop;
   end TT2;

begin
   declare
  T1 : TT1;
  T2 : TT2;
   begin
  null;
   end;
   if V (1) + V (2) not in 1_000_000 | 1_000_001 then
  raise Program_Error;
   end if;
end;
Index: exp_ch4.adb
===
--- exp_ch4.adb	(revision 233448)
+++ exp_ch4.adb	(working copy)
@@ -6,7 +6,7 @@
 --  --
 -- B o d y  --
 --  --
---  Copyright (C) 1992-2015, Free Software Foundation, Inc. --
+--  Copyright (C) 1992-2016, Free Software Foundation, Inc. --
 --  --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
@@ -6269,6 +6269,9 @@ package body Exp_Ch4 is
and then not Atomic_Synchronization_Disabled (Atp))
 or else (Is_Atomic (Typ)
   and then not Atomic_Synchronization_Disabled (Typ))
+or else (Is_Entity_Name (P)
+  and then Has_Atomic_Components (Entity (P))
+  and then not Atomic_Synchronization_Disabled (Entity (P)))
   then
  Activate_Atomic_Synchronization (N);
   end if;
Index: gcc-interface/trans.c
===
--- gcc-interface/trans.c	(revision 233448)
+++ gcc-interface/trans.c	(working copy)
@@ -4028,6 +4028,9 @@ node_is_atomic (Node_Id gnat_node)
 case N_Indexed_Component:
   if (Has_Atomic_Components (Etype (Prefix (gnat_node
 	return true;
+  if (Is_Entity_Name (Prefix (gnat_node))
+	  && Has_Atomic_Components (Entity (Prefix (gnat_node
+	return true;
 
   /* ... fall through ... */
 


Re: [PING, Patch, fortran, pr67451, v1] [5/6 Regression] ICE with sourced allocation from coarray

2016-02-17 Thread Andre Vehreschild
PING !

On Wed, 10 Feb 2016 12:26:24 +0100
Andre Vehreschild  wrote:

> Hi all,
> 
> unfortunately was my last patch for pr67451 not perfect and introduced
> regressions occurring on s390(x) and with the sanitizer. These were
> caused, because when taking the array specs from the source=-expression
> also its attributes, like coarray state and so on where taken from
> there. This additionally added a corank to local objects to allocate,
> that were no coarrays overwriting data in the array handle. The attached
> patch fixes both issues.
> 
> The patch for gcc-5 is not affected, because in gcc-5 the feature of
> taking the array spec from the source=-expression is not implemented.
> 
> Bootstrapped and regtested ok on x86_64-linux-gnu/F23.
> 
> Ok for trunk?
> 
> Regards,
>   Andre
> 
> On Tue, 2 Feb 2016 19:24:46 +0100
> Paul Richard Thomas  wrote:
> 
> > Hi Andre,
> > 
> > This looks to be OK for trunk.
> > 
> > I'll move to the 5-branch patch right away.
> > 
> > Thanks
> > 
> > Paul
> > 
> > On 29 January 2016 at 19:17, Andre Vehreschild  wrote:  
> > > Hi all,
> > >
> > > attached is a patch to fix a regression in current gfortran when a
> > > coarray is used in the source=-expression of an allocate(). The ICE was
> > > caused by the class information, i.e., _vptr and so on, not at the
> > > expected place. The patch fixes this.
> > >
> > > The patch also fixes pr69418, which I will flag as a duplicate in a
> > > second.
> > >
> > > Bootstrapped and regtested ok on x86_64-linux-gnu/F23.
> > >
> > > Ok for trunk?
> > >
> > > Backport to gcc-5 is pending, albeit more difficult, because the
> > > allocate() implementation on 5 is not as advanced the one in 6.
> > >
> > > Regards,
> > > Andre
> > > --
> > > Andre Vehreschild * Email: vehre ad gmx dot de
> > 
> > 
> >   
> 
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


[PATCH] Fix PR69553

2016-02-17 Thread Richard Biener

Honza added skipping for the (broken) type-comparing code in
operand_equal_p for OEP_ADDRESS_OF.  This exposes the fact that
both IMAGPART_EXPR and ARRAY_REF lack comparison of their offsetting
effect.

In fact the issue is somewhat latent as for example nothing
verified before that we don't compare two ARRAY_REFs with different
low bound.  Looking at TYPE_PRECISION or TYPE_UNSIGNED for ARRAY_TYPEs
certainly doesn't do this properly.

So the following adds proper comparison of offsetting effects.
Unfortunately due to PLACEHOLDER_EXPR handling both
array_ref_low_bound and array_ref_element_size are mutating (ugh)
and probably more costly than needed.  COMPONENT_REF gets away
with comparing FIELD_DECLs for pointer equality so eventually
we could get away with comparing TYPE_DOMAIN for pointer equality
as well as TYPE_SIZE.  In the end we should require ops 2 and 3
to be always present (even if integer constants), that would simplify
things a lot.  Or maybe have simpler variants for after gimplification.
Or remove PLACEHOLDER_EXPR from GENERIC and force Ada to lower those.

Well.

The following patch at least fixes the wrong-code issue Honza exposed.

We still need to fix the bogus type compatibility check, but that
doesn't look like a regression.  Maybe sth like

Index: gcc/fold-const.c
===
*** gcc/fold-const.c(revision 233447)
--- gcc/fold-const.c(working copy)
*** operand_equal_p (const_tree arg0, const_
*** 2779,2784 
--- 2779,2790 
  
if (!(flags & OEP_ADDRESS_OF))
  {
+   if (AGGREGATE_TYPE_P (TREE_TYPE (arg0))
+ != AGGREGATE_TYPE_P (TREE_TYPE (arg1)))
+   return 0;
+ 
+   if (! AGGREGATE_TYPE_P (TREE_TYPE (arg0)))
+   {
  /* If both types don't have the same signedness, then we can't 
consider
 them equal.  We must check this before the STRIP_NOPS calls
 because they may change the signedness of the arguments.  As 
pointers
*** operand_equal_p (const_tree arg0, const_
*** 2798,2803 
--- 2804,2810 
  STRIP_NOPS (arg0);
  STRIP_NOPS (arg1);
}
+ }
  #if 0
/* FIXME: Fortran FE currently produce ADDR_EXPR of NOP_EXPR. Enable 
the
   sanity check once the issue is solved.  */

but well...

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

I'm not too happy about calling the mutating array_ref_low_bound
or array_ref_element_size here but I'm not sure what reliable
alternative we'd have (compare TYPE_SIZE and TYPE_DOMAIN TYPE_MIN_VALUE?).

Richard.

2016-02-17  Richard Biener  

PR middle-end/69553
* fold-const.c (operand_equal_p): Properly compare offsets for
IMAGPART_EXPR and ARRAY_REF.

* g++.dg/torture/pr69553.C: New testcase.

Index: gcc/fold-const.c
===
*** gcc/fold-const.c(revision 233483)
--- gcc/fold-const.c(working copy)
*** operand_equal_p (const_tree arg0, const_
*** 3008,3015 
  flags &= ~OEP_ADDRESS_OF;
  return OP_SAME (0);
  
-   case REALPART_EXPR:
case IMAGPART_EXPR:
case VIEW_CONVERT_EXPR:
  return OP_SAME (0);
  
--- 3008,3022 
  flags &= ~OEP_ADDRESS_OF;
  return OP_SAME (0);
  
case IMAGPART_EXPR:
+ /* Require the same offset.  */
+ if (!operand_equal_p (TYPE_SIZE (TREE_TYPE (arg0)),
+   TYPE_SIZE (TREE_TYPE (arg1)),
+   flags & ~OEP_ADDRESS_OF))
+   return 0;
+ 
+   /* Fallthru.  */
+   case REALPART_EXPR:
case VIEW_CONVERT_EXPR:
  return OP_SAME (0);
  
*** operand_equal_p (const_tree arg0, const_
*** 3049,3065 
  
case ARRAY_REF:
case ARRAY_RANGE_REF:
- /* Operands 2 and 3 may be null.
-Compare the array index by value if it is constant first as we
-may have different types but same value here.  */
  if (!OP_SAME (0))
return 0;
  flags &= ~OEP_ADDRESS_OF;
  return ((tree_int_cst_equal (TREE_OPERAND (arg0, 1),
   TREE_OPERAND (arg1, 1))
   || OP_SAME (1))
  && OP_SAME_WITH_NULL (2)
! && OP_SAME_WITH_NULL (3));
  
case COMPONENT_REF:
  /* Handle operand 2 the same as for ARRAY_REF.  Operand 0
--- 3056,3084 
  
case ARRAY_REF:
case ARRAY_RANGE_REF:
  if (!OP_SAME (0))
return 0;
  flags &= ~OEP_ADDRESS_OF;
+ /* Compare the array index by value if it is constant first as we
+may have different types but same value here.  */
  return ((tree_int_cst_equal (TREE_OPERAND (arg0, 1),
   TREE_OPERAND (arg1, 1))
   || OP_SAME (1))
  

[Ada] Fix ICE on element of array with enumeration index

2016-02-17 Thread Eric Botcazou
This is a regression present on the mainline.  The compiler aborts on the 
element of an array whose index is an enumeration type with representation 
clause and whose component contains a small discriminated type with default 
discriminant, because of a spurious SAVE_EXPR created by gigi.

Tested on x86_64-suse-linux, applied on the mainline.


2016-02-17  Eric Botcazou  

* gcc-interface/utils2.c (gnat_protect_expr): Make a SAVE_EXPR only
for fat pointer or scalar types.


2016-02-17  Eric Botcazou  

* gnat.dg/discr46.ad[sb]: New test.

-- 
Eric BotcazouIndex: gcc-interface/utils2.c
===
--- gcc-interface/utils2.c	(revision 233448)
+++ gcc-interface/utils2.c	(working copy)
@@ -2559,12 +2559,11 @@ gnat_protect_expr (tree exp)
 return build3 (code, type, gnat_protect_expr (TREE_OPERAND (exp, 0)),
 		   TREE_OPERAND (exp, 1), TREE_OPERAND (exp, 2));
 
-  /* If this is a fat pointer or something that can be placed in a register,
- just make a SAVE_EXPR.  Likewise for a CALL_EXPR as large objects are
- returned via invisible reference in most ABIs so the temporary will
- directly be filled by the callee.  */
+  /* If this is a fat pointer or a scalar, just make a SAVE_EXPR.  Likewise
+ for a CALL_EXPR as large objects are returned via invisible reference
+ in most ABIs so the temporary will directly be filled by the callee.  */
   if (TYPE_IS_FAT_POINTER_P (type)
-  || TYPE_MODE (type) != BLKmode
+  || !AGGREGATE_TYPE_P (type)
   || code == CALL_EXPR)
 return save_expr (exp);
 
-- { dg-do compile }

package body Discr46 is

   function F (Id : Enum) return Integer is
  Node : Integer := 0;
   begin
  if A (Id).R.D = True then
 Node := A (Id).R.T;
  end if;
  return Node;
   end; 

end Discr46;
package Discr46 is

   type Enum is (One, Two, Three);
   for Enum use (One => 1, Two => 2, Three => 3);

   type Rec1 (D : Boolean := False) is record
  case D is
 when False => null;
 when True => T : Integer;
  end case;
   end record;

   type Rec2 is record
  R : Rec1;
  C : Character;
   end record;

   type Arr is array (Enum) of Rec2;

   A : Arr; 

   function F (Id : Enum) return Integer;  

end Discr46;


Re: [6 Regession] Usage of unitialized pointer io/list_read.c (

2016-02-17 Thread Christophe Lyon
On 16 February 2016 at 18:49, Jerry DeLisle  wrote:
> On 02/16/2016 12:06 AM, Christophe Lyon wrote:
>> On 15 February 2016 at 23:16, Janne Blomqvist  
>> wrote:
>>> On Mon, Feb 15, 2016 at 11:45 PM, Jerry DeLisle  
>>> wrote:
 The title of the PR should be "Mishandling of namelist comments" or
 "Interpreting '!' as a comment in non-namelist reads".
> --- snip ---
>>
>> Although OK in trunk, I've noticed regressions in the gcc-5 branch
>> since you commtted
>> r233442:
>>
>
> There is a subsequent commit that updates those two failing cases,
> namelist_38.f90 and namelist_84.f90.
>
> Please check that you have those updates and let me know if they still fail.
>
Indeed, the subsequent commit fixed them, thanks.

> Regards,
>
> Jerry
>