Re: [PR84620] output symbolic entry_view as data2, not addr

2018-03-09 Thread Alexandre Oliva
On Mar  9, 2018, Jakub Jelinek  wrote:

> Also, I'd say there should be
>   symview_upper_bound = 0;
> in dwarf2out_c_finalize for better behavior of GCC JIT.

Thanks.  I added code to clear zero_view_p too.

Here's what I'm checking in.


[IEPM] [PR debug/84620] use constant form for DW_AT_GNU_entry_view

When outputting entry views in symbolic mode, we used to use a lbl_id,
but that outputs the view as an addr, perhaps even in an indirect one,
which is all excessive and undesirable for a small assembler-computed
constant.

Introduce a new value class for symbolic views, so that we can output
the labels as constant data, using as narrow forms as possible, but
wide enough for any symbolic views output in the compilation.  We
don't know exactly where the assembler will reset views, but we count
the symbolic views since known reset points and use that as an upper
bound for view numbers.

Ideally, we'd use uleb128, but then the compiler would have to defer
.debug_info offset computation to the assembler.  I'm not going there
for now, so a symbolic uleb128 assembler constant in an attribute is
not something GCC can deal with ATM.

for  gcc/ChangeLog

PR debug/84620
* dwarf2out.h (dw_val_class): Add dw_val_class_symview.
(dw_val_node): Add val_symbolic_view.
* dwarf2out.c (dw_line_info_table): Add symviews_since_reset.
(symview_upper_bound): New.
(new_line_info_table): Initialize symviews_since_reset.
(dwarf2out_source_line): Count symviews_since_reset and set
symview_upper_bound.
(dw_val_equal_p): Handle symview.
(add_AT_symview): New.
(print_dw_val): Handle symview.
(attr_checksum, attr_checksum_ordered): Likewise.
(same_dw_val_p, size_of_die): Likewise.
(value_format, output_die): Likewise.
(add_high_low_attributes): Use add_AT_symview for entry_view.
(dwarf2out_finish): Reset symview_upper_bound, clear
zero_view_p.
---
 gcc/dwarf2out.c |  102 ---
 gcc/dwarf2out.h |4 ++
 2 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index c39910f26cb3..4e6ee5e8f82e 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1434,6 +1434,8 @@ dw_val_equal_p (dw_val_node *a, dw_val_node *b)
   return a->v.val_die_ref.die == b->v.val_die_ref.die;
 case dw_val_class_fde_ref:
   return a->v.val_fde_index == b->v.val_fde_index;
+case dw_val_class_symview:
+  return strcmp (a->v.val_symbolic_view, b->v.val_symbolic_view) == 0;
 case dw_val_class_lbl_id:
 case dw_val_class_lineptr:
 case dw_val_class_macptr:
@@ -2951,6 +2953,11 @@ struct GTY(()) dw_line_info_table {
  going to ask the assembler to assign.  */
   var_loc_view view;
 
+  /* This counts the number of symbolic views emitted in this table
+ since the latest view reset.  Its max value, over all tables,
+ sets symview_upper_bound.  */
+  var_loc_view symviews_since_reset;
+
 #define FORCE_RESET_NEXT_VIEW(x) ((x) = (var_loc_view)-1)
 #define RESET_NEXT_VIEW(x) ((x) = (var_loc_view)0)
 #define FORCE_RESETTING_VIEW_P(x) ((x) == (var_loc_view)-1)
@@ -2959,6 +2966,13 @@ struct GTY(()) dw_line_info_table {
   vec *entries;
 };
 
+/* This is an upper bound for view numbers that the assembler may
+   assign to symbolic views output in this translation.  It is used to
+   decide how big a field to use to represent view numbers in
+   symview-classed attributes.  */
+
+static var_loc_view symview_upper_bound;
+
 /* If we're keep track of location views and their reset points, and
INSN is a reset point (i.e., it necessarily advances the PC), mark
the next view in TABLE as reset.  */
@@ -3603,6 +3617,7 @@ static addr_table_entry *add_addr_table_entry (void *, 
enum ate_kind);
 static void remove_addr_table_entry (addr_table_entry *);
 static void add_AT_addr (dw_die_ref, enum dwarf_attribute, rtx, bool);
 static inline rtx AT_addr (dw_attr_node *);
+static void add_AT_symview (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_lineptr (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_macptr (dw_die_ref, enum dwarf_attribute, const char *);
@@ -5117,6 +5132,21 @@ add_AT_vms_delta (dw_die_ref die, enum dwarf_attribute 
attr_kind,
   add_dwarf_attr (die, );
 }
 
+/* Add a symbolic view identifier attribute value to a DIE.  */
+
+static inline void
+add_AT_symview (dw_die_ref die, enum dwarf_attribute attr_kind,
+   const char *view_label)
+{
+  dw_attr_node attr;
+
+  attr.dw_attr = attr_kind;
+  attr.dw_attr_val.val_class = dw_val_class_symview;
+  attr.dw_attr_val.val_entry = NULL;
+  attr.dw_attr_val.v.val_symbolic_view = xstrdup (view_label);
+  add_dwarf_attr (die, );
+}
+
 /* Add a label identifier attribute value to a DIE.  

[PATCH] PR fortran/84734 -- Fix ICE on invalid code

2018-03-09 Thread Steve Kargl
In fixing PR fortran/83633, it seems the patch I committed
introduced an ICE for nonsensical invalid Fortran.  The 
attached patch cures the ICE and now (re)issues an error
message.

The basic problem seems to boil down to the recursive 
calling of gfc_simplify_expr reduces "huge(1_8)+1_8" to
"constant + constant".  When the chain of gfc_simplify_expr
tries to reduces this expression an overflow occurs.  An
error message is queud but never emitted, and the result
is set to NULL and both constants are freed.  The NULL is
passed back up through the chain of gfc_simplify_expr.
At some point that NULL pointer is referenced.  The patch
works around the problem by passing the result with the
overflow value up the chain.  

Regression tested on x86_64-*-freebsd.  I intend to commit
this patch tomorrow, which on my clock is only 2.75 hours
away.

2018-03-09  Steven G. Kargl  

PR fortran/84734
* arith.c (check_result, eval_intrinsic):  If result overflows, pass
the expression up the chain instead of a NULL pointer.

2018-03-09  Steven G. Kargl  

PR fortran/84734
* gfortran.dg/pr84734.f90: New test.

-- 
Steve


Re: [PATCH] PR fortran/84734 -- Fix ICE on invalid code

2018-03-09 Thread Steve Kargl
On Fri, Mar 09, 2018 at 09:13:10PM -0800, Steve Kargl wrote:
> In fixing PR fortran/83633, it seems the patch I committed
> introduced an ICE for nonsensical invalid Fortran.  The 
> attached patch cures the ICE and now (re)issues an error
> message.
> 
> The basic problem seems to boil down to the recursive 
> calling of gfc_simplify_expr reduces "huge(1_8)+1_8" to
> "constant + constant".  When the chain of gfc_simplify_expr
> tries to reduces this expression an overflow occurs.  An
> error message is queud but never emitted, and the result
> is set to NULL and both constants are freed.  The NULL is
> passed back up through the chain of gfc_simplify_expr.
> At some point that NULL pointer is referenced.  The patch
> works around the problem by passing the result with the
> overflow value up the chain.  
> 
> Regression tested on x86_64-*-freebsd.  I intend to commit
> this patch tomorrow, which on my clock is only 2.75 hours
> away.
> 
> 2018-03-09  Steven G. Kargl  
> 
> PR fortran/84734
> * arith.c (check_result, eval_intrinsic):  If result overflows, pass
> the expression up the chain instead of a NULL pointer.
> 
> 2018-03-09  Steven G. Kargl  
> 
> PR fortran/84734
> * gfortran.dg/pr84734.f90: New test.
> 

Now with an attached patch.

-- 
Steve
Index: gcc/fortran/arith.c
===
--- gcc/fortran/arith.c	(revision 258367)
+++ gcc/fortran/arith.c	(working copy)
@@ -555,10 +555,10 @@ check_result (arith rc, gfc_expr *x, gfc_expr *r, gfc_
   val = ARITH_OK;
 }
 
-  if (val != ARITH_OK)
-gfc_free_expr (r);
-  else
+  if (val == ARITH_OK || val == ARITH_OVERFLOW)
 *rp = r;
+  else
+gfc_free_expr (r);
 
   return val;
 }
@@ -1603,8 +1603,12 @@ eval_intrinsic (gfc_intrinsic_op op,
   if (rc != ARITH_OK)
 {
   gfc_error (gfc_arith_error (rc), >where);
+  if (rc == ARITH_OVERFLOW)
+	goto done;
   return NULL;
 }
+
+done:
 
   gfc_free_expr (op1);
   gfc_free_expr (op2);
Index: gcc/testsuite/gfortran.dg/pr84734.f90
===
--- gcc/testsuite/gfortran.dg/pr84734.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr84734.f90	(working copy)
@@ -0,0 +1,4 @@
+! { dg-do compile }
+! PR fortran/84734
+   integer :: b(huge(1_8)+1_8) = 0 ! { dg-error "Arithmetic overflow" }
+   end


C++ PATCH for c++/84752, ICE with capture of constexpr array

2018-03-09 Thread Jason Merrill
Another case where we need to set a flag on the ck_identity conversion
so that we don't mistakenly pull out the constant value of something
that we want the address of.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 493898f505c4b38145356ef3b51dd986312497a8
Author: Jason Merrill 
Date:   Fri Mar 9 17:14:04 2018 -0500

PR c++/84752 - ICE with capture of constexpr array.

* call.c (standard_conversion): Set rvaluedness_matches_p on the
identity conversion under ck_lvalue.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 17cd1c4f63e..45c22aaa312 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -103,7 +103,7 @@ struct conversion {
  being bound to an rvalue expression.  If KIND is ck_rvalue,
  true when we are treating an lvalue as an rvalue (12.8p33).  If
  KIND is ck_base, always false.  If ck_identity, we will be
- binding a reference directly.  */
+ binding a reference directly or decaying to a pointer.  */
   BOOL_BITFIELD rvaluedness_matches_p: 1;
   BOOL_BITFIELD check_narrowing: 1;
   /* The type of the expression resulting from the conversion.  */
@@ -1139,6 +1139,8 @@ standard_conversion (tree to, tree from, tree expr, bool c_cast_p,
 {
   from = type_decays_to (from);
   fcode = TREE_CODE (from);
+  /* Tell convert_like_real that we're using the address.  */
+  conv->rvaluedness_matches_p = true;
   conv = build_conv (ck_lvalue, from, conv);
 }
   /* Wrapping a ck_rvalue around a class prvalue (as a result of using
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-array3.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-array3.C
new file mode 100644
index 000..09711033511
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-array3.C
@@ -0,0 +1,9 @@
+// PR c++/84752
+// { dg-do compile { target c++11 } }
+
+void foo()
+{
+  constexpr int x[1] = {};
+  []{ return (bool)x; };
+}
+	


C++ PATCH for c++/84785, ICE with alias template and default targs

2018-03-09 Thread Jason Merrill
If we are missing some earlier template arguments, we need to be
prepared for template parameters to remain in the result of
substitution.  In such a situation we need to make sure
processing_template_decl is set so things don't get confused by those
template parameters.

Tested x86_64-pc-linux-gnu, applying to trunk/7/6.
commit c7c6394b812ab2dcdace68d637c516ea3fb2a5f5
Author: Jason Merrill 
Date:   Fri Mar 9 17:40:39 2018 -0500

PR c++/84785 - ICE with alias template and default targs.

* pt.c (type_unification_real): Set processing_template_decl if
saw_undeduced == 1.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 89024c10fe2..2d7ce6062a7 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -19977,7 +19977,13 @@ type_unification_real (tree tparms,
 	  location_t save_loc = input_location;
 	  if (DECL_P (parm))
 	input_location = DECL_SOURCE_LOCATION (parm);
+
+	  if (saw_undeduced == 1)
+	++processing_template_decl;
 	  arg = tsubst_template_arg (arg, full_targs, fcomplain, NULL_TREE);
+	  if (saw_undeduced == 1)
+	--processing_template_decl;
+
 	  if (arg != error_mark_node && !uses_template_parms (arg))
 	arg = convert_template_argument (parm, arg, full_targs, complain,
 	 i, NULL_TREE);
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-63.C b/gcc/testsuite/g++.dg/cpp0x/alias-decl-63.C
new file mode 100644
index 000..04fb42d9e09
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-63.C
@@ -0,0 +1,18 @@
+// PR c++/84785
+// { dg-do compile { target c++11 } }
+
+template  struct A;
+template  struct B;
+template  using enable_if_t = typename B::type;
+template  using type_pack_element = int;
+struct variant {
+  variant() {}
+  template , enable_if_t>
+  variant(Arg &&);
+};
+
+struct S {
+  variant var;
+};
+int main() { S s; }


C++ PATCH for c++/84770, ICE with parameter pack and typedef

2018-03-09 Thread Jason Merrill
Now that argument packs don't have a type, the check for a non-type
argument with a typedefy type wasn't working for them.  So let's
recurse.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 7a163ab3c89382ad5c3bda2f345950760bc499c7
Author: Jason Merrill 
Date:   Fri Mar 9 21:56:18 2018 -0500

PR c++/84770 - ICE with typedef and parameter pack.

* pt.c (verify_unstripped_args_1): Split out from
verify_unstripped_args.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 2d7ce6062a7..c53f0bc23bb 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -1133,27 +1133,32 @@ optimize_specialization_lookup_p (tree tmpl)
 /* Make sure ARGS doesn't use any inappropriate typedefs; we should have
gone through coerce_template_parms by now.  */
 
+static void
+verify_unstripped_args_1 (tree inner)
+{
+  for (int i = 0; i < TREE_VEC_LENGTH (inner); ++i)
+{
+  tree arg = TREE_VEC_ELT (inner, i);
+  if (TREE_CODE (arg) == TEMPLATE_DECL)
+	/* OK */;
+  else if (TYPE_P (arg))
+	gcc_assert (strip_typedefs (arg, NULL) == arg);
+  else if (ARGUMENT_PACK_P (arg))
+	verify_unstripped_args_1 (ARGUMENT_PACK_ARGS (arg));
+  else if (strip_typedefs (TREE_TYPE (arg), NULL) != TREE_TYPE (arg))
+	/* Allow typedefs on the type of a non-type argument, since a
+	   parameter can have them.  */;
+  else
+	gcc_assert (strip_typedefs_expr (arg, NULL) == arg);
+}
+}
+
 static void
 verify_unstripped_args (tree args)
 {
   ++processing_template_decl;
   if (!any_dependent_template_arguments_p (args))
-{
-  tree inner = INNERMOST_TEMPLATE_ARGS (args);
-  for (int i = 0; i < TREE_VEC_LENGTH (inner); ++i)
-	{
-	  tree arg = TREE_VEC_ELT (inner, i);
-	  if (TREE_CODE (arg) == TEMPLATE_DECL)
-	/* OK */;
-	  else if (TYPE_P (arg))
-	gcc_assert (strip_typedefs (arg, NULL) == arg);
-	  else if (strip_typedefs (TREE_TYPE (arg), NULL) != TREE_TYPE (arg))
-	/* Allow typedefs on the type of a non-type argument, since a
-	   parameter can have them.  */;
-	  else
-	gcc_assert (strip_typedefs_expr (arg, NULL) == arg);
-	}
-}
+verify_unstripped_args_1 (INNERMOST_TEMPLATE_ARGS (args));
   --processing_template_decl;
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/variadic173.C b/gcc/testsuite/g++.dg/cpp0x/variadic173.C
new file mode 100644
index 000..a0ca89b764f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/variadic173.C
@@ -0,0 +1,10 @@
+// PR c++/84770
+// { dg-do compile { target c++11 } }
+
+typedef int T;
+
+template struct A {};
+
+int i;
+
+A a;


[committed] Fix va_arg gimplification on powerpc{,spe} (PR target/84772)

2018-03-09 Thread Jakub Jelinek
Hi!

The following is a powerpcspe variant of the sparc PR39645, and rs6000
has the same code (not sure if ever used or dead after powerpcspe removal).
When gimplifying the builtin, because va_arg_tmp.N is not marked as
addressable from the beginning, we call prepare_gimple_addressable which
assigns the previous value (uninitialized in this case) into the now
addressable variable and nothing optimizes it away.  Fixed by making it
addressable from the beginning.

Bootstrapped/regtested on {x86_64,i686,powerpc64{,le}}-linux
and tested with a cross to powerpcspe-linux on the testcase, committed as
obvious to trunk.

2018-03-09  Jakub Jelinek  

PR target/84772
* config/rs6000/rs6000.c (rs6000_gimplify_va_arg): Mark va_arg_tmp
temporary TREE_ADDRESSABLE before gimplification of BUILT_IN_MEMCPY.
* config/powerpcspe/powerpcspe.c (rs6000_gimplify_va_arg): Likewise.

* gcc.dg/pr84772.c: New test.

--- gcc/config/rs6000/rs6000.c.jj   2018-03-07 22:52:01.294479625 +0100
+++ gcc/config/rs6000/rs6000.c  2018-03-09 17:29:09.811949947 +0100
@@ -13537,6 +13537,7 @@ rs6000_gimplify_va_arg (tree valist, tre
 
   tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
   3, dest_addr, addr, size_int (rsize * 4));
+  TREE_ADDRESSABLE (tmp) = 1;
 
   gimplify_and_add (copy, pre_p);
   addr = dest_addr;
--- gcc/config/powerpcspe/powerpcspe.c.jj   2018-01-03 10:20:14.842537106 
+0100
+++ gcc/config/powerpcspe/powerpcspe.c  2018-03-09 17:29:35.291959951 +0100
@@ -14254,6 +14254,7 @@ rs6000_gimplify_va_arg (tree valist, tre
 
   tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
   3, dest_addr, addr, size_int (rsize * 4));
+  TREE_ADDRESSABLE (tmp) = 1;
 
   gimplify_and_add (copy, pre_p);
   addr = dest_addr;
--- gcc/testsuite/gcc.dg/pr84772.c.jj   2018-03-09 17:34:56.732749975 +0100
+++ gcc/testsuite/gcc.dg/pr84772.c  2018-03-09 17:33:05.095042322 +0100
@@ -0,0 +1,13 @@
+/* PR target/84772 */
+/* { dg-do compile } */
+/* { dg-options "-O -Wuninitialized" } */
+
+#include 
+
+void
+foo (int *x, int y, va_list ap)
+{
+  __builtin_memset (x, 0, sizeof (int));
+  for (int i = 0; i < y; i++)
+va_arg (ap, long double);  /* { dg-bogus "uninitialized" } 
*/  
+}

Jakub


[PATCH] Fix scoped enum debug info (PR debug/58150)

2018-03-09 Thread Jakub Jelinek
Hi!

As the following testcase shows, we emit bad debug info if a scoped
enum is used before the enumerators are defined.
gen_enumeration_type_die has support for enum forward declarations that
have NULL TYPE_SIZE (i.e. incomplete), but in this case it is complete,
just is ENUM_IS_OPAQUE and the enumerators aren't there.  We still set
TREE_ASM_WRITTEN on it and therefore don't do anything when it actually is
completed.

The following patch does change/fix a bunch of things:
1) I don't see the point of guarding the addition of DW_AT_declaration
to just -gdwarf-4 or -gno-strict-dwarf, that is already DWARF2 attribute and
we are emitting it for the forward declarations already
2) we don't set TREE_ASM_WRITTEN if ENUM_IS_OPAQUE
3) we don't try to do anything if it is ENUM_IS_OPAQUE that hasn't been
newly created; similarly to incomplete forward redeclarations, we should
have provided all that is needed on the first declaration
4) the addition of most attributes guarded with TYPE_SIZE is guarded by
(!original_type_die || !get_AT (type_die, DW_AT_...)); the
!original_type_die || part is just an optimization, so we don't try to query
it in the common cases where we are creating a fresh type die; anyway, the
reason for the guards is that we can now do the code twice for the same
type_die (on declaration and definition), and don't want to add the attributes
twice
5) not sure if ENUM_IS_OPAQUE must always imply non-NULL TYPE_SIZE, if not,
in that case we'd add the DW_AT_deprecated attribute twice

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-03-09  Jakub Jelinek  

PR debug/58150
* dwarf2out.c (gen_enumeration_type_die): Don't guard adding
DW_AT_declaration for ENUM_IS_OPAQUE on -gdwarf-4 or -gno-strict-dwarf,
but on TYPE_SIZE.  Don't do anything for ENUM_IS_OPAQUE if not creating
a new die.  Don't set TREE_ASM_WRITTEN if ENUM_IS_OPAQUE.  Guard
addition of most attributes on !orig_type_die or the attribute not
being present already.  Assert TYPE_VALUES is NULL for ENUM_IS_OPAQUE.

* g++.dg/debug/dwarf2/enum2.C: New test.

--- gcc/dwarf2out.c.jj  2018-03-08 22:49:41.965225512 +0100
+++ gcc/dwarf2out.c 2018-03-09 14:53:15.596115625 +0100
@@ -21839,6 +21839,7 @@ static dw_die_ref
 gen_enumeration_type_die (tree type, dw_die_ref context_die)
 {
   dw_die_ref type_die = lookup_type_die (type);
+  dw_die_ref orig_type_die = type_die;
 
   if (type_die == NULL)
 {
@@ -21846,20 +21847,18 @@ gen_enumeration_type_die (tree type, dw_
  scope_die_for (type, context_die), type);
   equate_type_number_to_die (type, type_die);
   add_name_attribute (type_die, type_tag (type));
-  if (dwarf_version >= 4 || !dwarf_strict)
-   {
- if (ENUM_IS_SCOPED (type))
-   add_AT_flag (type_die, DW_AT_enum_class, 1);
- if (ENUM_IS_OPAQUE (type))
-   add_AT_flag (type_die, DW_AT_declaration, 1);
-   }
+  if ((dwarf_version >= 4 || !dwarf_strict)
+ && ENUM_IS_SCOPED (type))
+   add_AT_flag (type_die, DW_AT_enum_class, 1);
+  if (ENUM_IS_OPAQUE (type) && TYPE_SIZE (type))
+   add_AT_flag (type_die, DW_AT_declaration, 1);
   if (!dwarf_strict)
add_AT_unsigned (type_die, DW_AT_encoding,
 TYPE_UNSIGNED (type)
 ? DW_ATE_unsigned
 : DW_ATE_signed);
 }
-  else if (! TYPE_SIZE (type))
+  else if (! TYPE_SIZE (type) || ENUM_IS_OPAQUE (type))
 return type_die;
   else
 remove_AT (type_die, DW_AT_declaration);
@@ -21871,10 +21870,14 @@ gen_enumeration_type_die (tree type, dw_
 {
   tree link;
 
-  TREE_ASM_WRITTEN (type) = 1;
-  add_byte_size_attribute (type_die, type);
-  add_alignment_attribute (type_die, type);
-  if (dwarf_version >= 3 || !dwarf_strict)
+  if (!ENUM_IS_OPAQUE (type))
+   TREE_ASM_WRITTEN (type) = 1;
+  if (!orig_type_die || !get_AT (type_die, DW_AT_byte_size))
+   add_byte_size_attribute (type_die, type);
+  if (!orig_type_die || !get_AT (type_die, DW_AT_alignment))
+   add_alignment_attribute (type_die, type);
+  if ((dwarf_version >= 3 || !dwarf_strict)
+ && (!orig_type_die || !get_AT (type_die, DW_AT_type)))
{
  tree underlying = lang_hooks.types.enum_underlying_base_type (type);
  add_type_attribute (type_die, underlying, TYPE_UNQUALIFIED, false,
@@ -21882,8 +21885,10 @@ gen_enumeration_type_die (tree type, dw_
}
   if (TYPE_STUB_DECL (type) != NULL_TREE)
{
- add_src_coords_attributes (type_die, TYPE_STUB_DECL (type));
- add_accessibility_attribute (type_die, TYPE_STUB_DECL (type));
+ if (!orig_type_die || !get_AT (type_die, DW_AT_decl_file))
+   add_src_coords_attributes (type_die, TYPE_STUB_DECL (type));
+ if (!orig_type_die || !get_AT (type_die, DW_AT_accessibility))
+   

Re: [PATCH, rs6000] Fix PR83969: ICE in final_scan_insn, at final.c:2997

2018-03-09 Thread Peter Bergner
On 3/9/18 1:31 PM, Segher Boessenkool wrote:
> On Wed, Mar 07, 2018 at 06:50:41PM -0600, Peter Bergner wrote:
>> This passed bootstrap and regtesting on powerpc64-linux, running the
>> testsuite in both 32-bit and 64-bit modes with no regressions.
>> Ok for trunk?
> 
> Sorry this took a while to review.  It is okay for trunk.  Does this
> need backports?

Technically, it is broken there too, but until trunk, we never really
generated the altivec mems that trigger this bug, so I think I would
lean towards just having this on trunk and if someone, somehow hits
it, then we can back port it then.

Peter



C++ PATCH for c++/84726, unnecessary lambda capture of constant var

2018-03-09 Thread Jason Merrill
wg21.link/p0588 changes i from not captured to captured by the lambda
in the testcase, but an implementation is allowed to optimize away
that capture if it doesn't affect the semantics of the program.  Since
we didn't capture it in previous versions of GCC, the optimization is
very desirable to avoid an ABI change.

Tested x86_64-pc-linux-gnu (against the Boost::hana testsuite as
well), applying to trunk.
commit 419de4066a17e67f1f0e30b957c8fdf27cce8c93
Author: Jason Merrill 
Date:   Tue Mar 6 17:25:34 2018 -0500

PR c++/84726 - unnecessary capture of constant vars.

* cp-tree.h (LAMBDA_CAPTURE_EXPLICIT_P)
(LAMBDA_EXPR_CAPTURE_OPTIMIZED): New.
* expr.c (mark_use): Set LAMBDA_EXPR_CAPTURE_OPTIMIZED.
* lambda.c (is_constant_capture_proxy)
(current_lambda_expr, var_to_maybe_prune, mark_const_cap_r)
(prune_lambda_captures): New.
(finish_lambda_function): Call prune_lambda_captures.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index bf9897c9980..1ea5dc4248c 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -352,6 +352,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   TEMPLATE_PARM_PARAMETER_PACK (in TEMPLATE_PARM_INDEX)
   ATTR_IS_DEPENDENT (in the TREE_LIST for an attribute)
   ABI_TAG_IMPLICIT (in the TREE_LIST for the argument of abi_tag)
+  LAMBDA_CAPTURE_EXPLICIT_P (in a TREE_LIST in LAMBDA_EXPR_CAPTURE_LIST)
   CONSTRUCTOR_IS_DIRECT_INIT (in CONSTRUCTOR)
   LAMBDA_EXPR_CAPTURES_THIS_P (in LAMBDA_EXPR)
   DECLTYPE_FOR_LAMBDA_CAPTURE (in DECLTYPE_TYPE)
@@ -403,6 +404,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
   CONSTRUCTOR_MUTABLE_POISON (in CONSTRUCTOR)
   OVL_HIDDEN_P (in OVERLOAD)
   SWITCH_STMT_NO_BREAK_P (in SWITCH_STMT)
+  LAMBDA_EXPR_CAPTURE_OPTIMIZED (in LAMBDA_EXPR)
3: (TREE_REFERENCE_EXPR) (in NON_LVALUE_EXPR) (commented-out).
   ICS_BAD_FLAG (in _CONV)
   FN_TRY_BLOCK_P (in TRY_BLOCK)
@@ -1258,6 +1260,15 @@ enum cp_lambda_default_capture_mode_type {
 #define LAMBDA_EXPR_MUTABLE_P(NODE) \
   TREE_LANG_FLAG_1 (LAMBDA_EXPR_CHECK (NODE))
 
+/* True iff uses of a const variable capture were optimized away.  */
+#define LAMBDA_EXPR_CAPTURE_OPTIMIZED(NODE) \
+  TREE_LANG_FLAG_2 (LAMBDA_EXPR_CHECK (NODE))
+
+/* True if this TREE_LIST in LAMBDA_EXPR_CAPTURE_LIST is for an explicit
+   capture.  */
+#define LAMBDA_CAPTURE_EXPLICIT_P(NODE) \
+  TREE_LANG_FLAG_0 (TREE_LIST_CHECK (NODE))
+
 /* The source location of the lambda.  */
 #define LAMBDA_EXPR_LOCATION(NODE) \
   (((struct tree_lambda_expr *)LAMBDA_EXPR_CHECK (NODE))->locus)
@@ -6895,6 +6906,7 @@ extern void insert_capture_proxy		(tree);
 extern void insert_pending_capture_proxies	(void);
 extern bool is_capture_proxy			(tree);
 extern bool is_normal_capture_proxy (tree);
+extern bool is_constant_capture_proxy   (tree);
 extern void register_capture_members		(tree);
 extern tree lambda_expr_this_capture(tree, bool);
 extern void maybe_generic_this_capture		(tree, tree);
@@ -6902,6 +6914,7 @@ extern tree maybe_resolve_dummy			(tree, bool);
 extern tree current_nonlambda_function		(void);
 extern tree nonlambda_method_basetype		(void);
 extern tree current_nonlambda_scope		(void);
+extern tree current_lambda_expr			(void);
 extern bool generic_lambda_fn_p			(tree);
 extern tree do_dependent_capture		(tree, bool = false);
 extern bool lambda_fn_in_template_p		(tree);
diff --git a/gcc/cp/expr.c b/gcc/cp/expr.c
index 2e679868970..15894fc0b59 100644
--- a/gcc/cp/expr.c
+++ b/gcc/cp/expr.c
@@ -117,7 +117,15 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
 	  tree cap = DECL_CAPTURED_VARIABLE (expr);
 	  if (TREE_CODE (TREE_TYPE (cap)) == TREE_CODE (TREE_TYPE (expr))
 	  && decl_constant_var_p (cap))
-	return RECUR (cap);
+	{
+	  tree val = RECUR (cap);
+	  if (!is_capture_proxy (val))
+		{
+		  tree l = current_lambda_expr ();
+		  LAMBDA_EXPR_CAPTURE_OPTIMIZED (l) = true;
+		}
+	  return val;
+	}
 	}
   if (outer_automatic_var_p (expr)
 	  && decl_constant_var_p (expr))
@@ -160,7 +168,15 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
 	  tree cap = DECL_CAPTURED_VARIABLE (ref);
 	  if (TREE_CODE (TREE_TYPE (cap)) != REFERENCE_TYPE
 		  && decl_constant_var_p (cap))
-		return RECUR (cap);
+		{
+		  tree val = RECUR (cap);
+		  if (!is_capture_proxy (val))
+		{
+		  tree l = current_lambda_expr ();
+		  LAMBDA_EXPR_CAPTURE_OPTIMIZED (l) = true;
+		}
+		  return val;
+		}
 	}
 	  tree r = mark_rvalue_use (ref, loc, reject_builtin);
 	  if (r != ref)
diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index 094979e81a3..de064ffc85e 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -291,6 +291,17 @@ is_normal_capture_proxy (tree decl)
   return DECL_NORMAL_CAPTURE_P (val);
 }
 
+/* Returns true iff DECL is a capture proxy for a normal capture
+   of a constant 

Re: [RFA][PATCH][PR middle-end/61118] Improve tree CFG accuracy for setjmp/longjmp

2018-03-09 Thread Richard Biener
On March 9, 2018 8:42:16 PM GMT+01:00, Jeff Law  wrote:
>On 03/08/2018 06:22 AM, Richard Biener wrote:
>> On Thu, Mar 8, 2018 at 1:54 PM, Michael Matz  wrote:
>>> Hi,
>>>
>>> On Wed, 7 Mar 2018, Peter Bergner wrote:
>>>
 On 3/7/18 12:01 AM, Jeff Law wrote:
> I believe so by nature that the setjmp dominates the longjmp sites
>and
> thus also dominates the dispatcher.  But it's something I want to
> explicitly check before resubmitting.

 Are we sure a setjmp has to dominate its longjmp sites?
>>>
>>> No, they don't have to dominate.  For lack of better term I used
>something
>>> like "ordered after" in my mails :)
>>>
 Couldn't you have something like:
>>>
>>> Yeah, exactly.
>> 
>> So given all the discussion _iff_ we want to change the CFG we
>generate then
>> let's invent a general __builtin_receiver () and lower setjmp to
>And I'm seriously thinking we may want to hold off on the fix for 61118
>for gcc-8.  We still might want to fix 21161 though.  I still need to
>digest all the discussion.
>
>
>> 
>>   setjmp ();
>>   res = __builtin_receiver ();
>> 
>> and construct a CFG around that.  Remember that IIRC I added the
>abnormal
>> edges to and from setjmp to inhibit code-motion across it so whatever
>CFG
>> we'll end up with should ensure that there can't be any code-motion
>optimization
>> across the above pair nor "inbetween" it.  The straight-forward
>> CFG of, apart from the fallthru from setjmp to the receiver, a
>abnormal edge
>> to the dispatcher from setjmp and an abnormal edge from the
>dispatcher to
>> the receiver would do that trick I think.
>> 
>> I'd rather not do that for GCC 8 though.  So to fix the warning can't
>we do
>> sth else "good" and move the strange warning code from RTL to GIMPLE?
>Someone mentioned that possibility in one of the related BZs.  The
>concern was the factoring of the handler could really hinder good
>dataflow analysis.  We could end up making things worse :(
>
>
>> 
>> Or re-do the warning?  Since in the other thread about setjmp
>side-effects
>> we concluded that setjmp has to preserve all call-saved regs?  I
>don't see
>> that reflected in regno_clobbered_at_setjmp or its caller -- that is,
>> we should only warn for call clobbered and thus caller-saved regs
>because
>> normal return may clobber the spilled values.
>Possibly.  It's clear from the discussion and multitude of BZs that
>this
>is complex and easily goof'd.
>
>I believe part of the "trick" here is that once we compute (in RTL) the
>set of objects live across the setjmp the allocators then refuse to
>allocate those values into call-saved registers (hence the other
>discussion thread) with Peter and co.  Of course the RTL analysis get
>this wrong in a roughly similar manner (21161).
>
>
>> Not sure if the PR testcase is amongst the cases fixed by such
>change.
>Unclear -- it'd likely depend on where we do the analysis.  It's
>certainly the case that for 61118 that if the analysis happens in RTL
>and we haven't addressed our CFG correctness issues that we're going to
>fail.

Note that on RTL the CFG does not have any abnormal edges for setjmp and thus 
code is freely moved across it. Maybe addressing that would also help. 

Richard. 

>
>jeff.



Re: [C++ Patch] PR 71169 ("[7/8 Regression] ICE on invalid C++ code in pop_nested_class"), PR 71832 and more (Take 2)

2018-03-09 Thread Paolo Carlini

Hi,

On 09/03/2018 17:11, Jason Merrill wrote:

Hmm, actually I think any_erroneous_template_args will return false
for error_mark_node.  Just moving the error_mark_node check up should
correct that.  OK with that change.
Ah, thanks for spotting that: it wasn't clear to me that TI_ARGS cannot 
be error_mark_node neither that skipping the bodies when the type itself 
is error_mark_node cannot hurt. I'm finishing testing the below and will 
apply it if everything goes well.


Thanks!
Paolo.

/

Index: cp/cp-tree.h
===
--- cp/cp-tree.h(revision 258391)
+++ cp/cp-tree.h(working copy)
@@ -6558,6 +6558,7 @@ extern int processing_template_parmlist;
 extern bool dependent_type_p   (tree);
 extern bool dependent_scope_p  (tree);
 extern bool any_dependent_template_arguments_p  (const_tree);
+extern bool any_erroneous_template_args_p   (const_tree);
 extern bool dependent_template_p   (tree);
 extern bool dependent_template_id_p(tree, tree);
 extern bool type_dependent_expression_p(tree);
Index: cp/parser.c
===
--- cp/parser.c (revision 258391)
+++ cp/parser.c (working copy)
@@ -22669,6 +22669,16 @@ cp_parser_class_specifier_1 (cp_parser* parser)
   cp_default_arg_entry *e;
   tree save_ccp, save_ccr;
 
+  if (any_erroneous_template_args_p (type))
+   {
+ /* Skip default arguments, NSDMIs, etc, in order to improve
+error recovery (c++/71169, c++/71832).  */
+ vec_safe_truncate (unparsed_funs_with_default_args, 0);
+ vec_safe_truncate (unparsed_nsdmis, 0);
+ vec_safe_truncate (unparsed_classes, 0);
+ vec_safe_truncate (unparsed_funs_with_definitions, 0);
+   }
+
   /* In a first pass, parse default arguments to the functions.
 Then, in a second pass, parse the bodies of the functions.
 This two-phased approach handles cases like:
Index: cp/pt.c
===
--- cp/pt.c (revision 258391)
+++ cp/pt.c (working copy)
@@ -25048,6 +25048,39 @@ any_dependent_template_arguments_p (const_tree arg
   return false;
 }
 
+/* Returns true if ARGS contains any errors.  */
+
+bool
+any_erroneous_template_args_p (const_tree args)
+{
+  int i;
+  int j;
+
+  if (args == error_mark_node)
+return true;
+
+  if (args && TREE_CODE (args) != TREE_VEC)
+{
+  if (tree ti = get_template_info (args))
+   args = TI_ARGS (ti);
+  else
+   args = NULL_TREE;
+}
+
+  if (!args)
+return false;
+
+  for (i = 0; i < TMPL_ARGS_DEPTH (args); ++i)
+{
+  const_tree level = TMPL_ARGS_LEVEL (args, i + 1);
+  for (j = 0; j < TREE_VEC_LENGTH (level); ++j)
+   if (error_operand_p (TREE_VEC_ELT (level, j)))
+ return true;
+}
+
+  return false;
+}
+
 /* Returns TRUE if the template TMPL is type-dependent.  */
 
 bool
Index: testsuite/g++.dg/cpp0x/pr71169-2.C
===
--- testsuite/g++.dg/cpp0x/pr71169-2.C  (nonexistent)
+++ testsuite/g++.dg/cpp0x/pr71169-2.C  (working copy)
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++11 } }
+
+template  class A {  // { dg-error "declared" }
+  template  void m_fn1() {
+m_fn1();
+}
+};
+
+template
+struct B
+{
+  int f(int = 0) { return 0; }
+};
+
+int main()
+{
+  B b;
+  return b.f();
+}
Index: testsuite/g++.dg/cpp0x/pr71169.C
===
--- testsuite/g++.dg/cpp0x/pr71169.C(nonexistent)
+++ testsuite/g++.dg/cpp0x/pr71169.C(working copy)
@@ -0,0 +1,7 @@
+// { dg-do compile { target c++11 } }
+
+template  class A {  // { dg-error "declared" }
+  template  void m_fn1() {
+m_fn1();
+}
+};
Index: testsuite/g++.dg/cpp0x/pr71832.C
===
--- testsuite/g++.dg/cpp0x/pr71832.C(nonexistent)
+++ testsuite/g++.dg/cpp0x/pr71832.C(working copy)
@@ -0,0 +1,7 @@
+// { dg-do compile { target c++11 } }
+
+template < typename decltype (0) > struct A  // { dg-error "expected|two or 
more" }
+{ 
+  void foo () { baz (); }
+  template < typename ... S > void baz () {}
+};


Re: [RFA][PATCH][PR middle-end/61118] Improve tree CFG accuracy for setjmp/longjmp

2018-03-09 Thread Jeff Law
On 03/07/2018 05:04 PM, Peter Bergner wrote:
> On 3/7/18 12:01 AM, Jeff Law wrote:
>> I believe so by nature that the setjmp dominates the longjmp sites and
>> thus also dominates the dispatcher.  But it's something I want to
>> explicitly check before resubmitting.
> 
> Are we sure a setjmp has to dominate its longjmp sites?  Couldn't you
> have something like:
> 
> bb(a): bb(b):
>   ......
>   setjmp (env)   setjmp (env)
>   \ /
>\   /
> \ /
>  \   /
>   \ /
>\   /
> bb(c):
>   ...
>   longjmp (env)
> 
> ...or:
> 
> bb(a):
>   ...
>   setjmp (env)
>   |\
>   | \
>   |  \
>   |   \
>   |   bb(b):
>   | ...
>   | setjmp (env)
>   |   /
>   |  /
>   | /
>   v
> bb(c):
>   ...
>   longjmp (env)
> 
> If so, then the setjmp calls might not dominate the longjmp call.
Right.  This is one of the cases that needs investigation WRT the value
that would flow into the PHI from the dispatcher.

Jeff


Re: [RFA][PATCH][PR middle-end/61118] Improve tree CFG accuracy for setjmp/longjmp

2018-03-09 Thread Jeff Law
On 03/08/2018 06:22 AM, Richard Biener wrote:
> On Thu, Mar 8, 2018 at 1:54 PM, Michael Matz  wrote:
>> Hi,
>>
>> On Wed, 7 Mar 2018, Peter Bergner wrote:
>>
>>> On 3/7/18 12:01 AM, Jeff Law wrote:
 I believe so by nature that the setjmp dominates the longjmp sites and
 thus also dominates the dispatcher.  But it's something I want to
 explicitly check before resubmitting.
>>>
>>> Are we sure a setjmp has to dominate its longjmp sites?
>>
>> No, they don't have to dominate.  For lack of better term I used something
>> like "ordered after" in my mails :)
>>
>>> Couldn't you have something like:
>>
>> Yeah, exactly.
> 
> So given all the discussion _iff_ we want to change the CFG we generate then
> let's invent a general __builtin_receiver () and lower setjmp to
And I'm seriously thinking we may want to hold off on the fix for 61118
for gcc-8.  We still might want to fix 21161 though.  I still need to
digest all the discussion.


> 
>   setjmp ();
>   res = __builtin_receiver ();
> 
> and construct a CFG around that.  Remember that IIRC I added the abnormal
> edges to and from setjmp to inhibit code-motion across it so whatever CFG
> we'll end up with should ensure that there can't be any code-motion 
> optimization
> across the above pair nor "inbetween" it.  The straight-forward
> CFG of, apart from the fallthru from setjmp to the receiver, a abnormal edge
> to the dispatcher from setjmp and an abnormal edge from the dispatcher to
> the receiver would do that trick I think.
> 
> I'd rather not do that for GCC 8 though.  So to fix the warning can't we do
> sth else "good" and move the strange warning code from RTL to GIMPLE?
Someone mentioned that possibility in one of the related BZs.  The
concern was the factoring of the handler could really hinder good
dataflow analysis.  We could end up making things worse :(


> 
> Or re-do the warning?  Since in the other thread about setjmp side-effects
> we concluded that setjmp has to preserve all call-saved regs?  I don't see
> that reflected in regno_clobbered_at_setjmp or its caller -- that is,
> we should only warn for call clobbered and thus caller-saved regs because
> normal return may clobber the spilled values.
Possibly.  It's clear from the discussion and multitude of BZs that this
is complex and easily goof'd.

I believe part of the "trick" here is that once we compute (in RTL) the
set of objects live across the setjmp the allocators then refuse to
allocate those values into call-saved registers (hence the other
discussion thread) with Peter and co.  Of course the RTL analysis get
this wrong in a roughly similar manner (21161).


> Not sure if the PR testcase is amongst the cases fixed by such change.
Unclear -- it'd likely depend on where we do the analysis.  It's
certainly the case that for 61118 that if the analysis happens in RTL
and we haven't addressed our CFG correctness issues that we're going to
fail.

jeff.



Re: [PATCH, rs6000] Fix PR83969: ICE in final_scan_insn, at final.c:2997

2018-03-09 Thread Segher Boessenkool
On Wed, Mar 07, 2018 at 06:50:41PM -0600, Peter Bergner wrote:
> PR83969 shows another bug in mem_operand_gpr() (which implements the "Y"
> constraint) accepting reg+reg addresses.  This was fixed by adding a call
> to rs6000_offsettable_memref_p() to verify the address is a valid offsettable
> address.  Fixing that exposed a problem in the *movdi_internal64 pattern
> which was using the "Y" constraint for the integer load/store alternatives,
> which allow either reg+offset or reg+reg addresses.  This worked before
> only because of the buggy mem_operand_gpr.  The fix for that was to use
> the "YZ" constraint which allows both reg+offset and reg+reg addresses.
> 
> This passed bootstrap and regtesting on powerpc64-linux, running the
> testsuite in both 32-bit and 64-bit modes with no regressions.
> Ok for trunk?

Sorry this took a while to review.  It is okay for trunk.  Does this
need backports?

Thanks!


Segher


> gcc/
>     PR target/83969
>     * config/rs6000/rs6000.c (rs6000_offsettable_memref_p): New prototype.
>     Add strict argument and use it.
>     (rs6000_split_multireg_move): Update for new strict argument.
>     (mem_operand_gpr): Disallow all non-offsettable addresses.
>     * config/rs6000/rs6000.md (*movdi_internal64): Use YZ constraint.
> 
> gcc/testsuite/
>     PR target/83969
>     * gcc.target/powerpc/pr83969.c: New test.


Re: [PATCH] avoid warning for memcpy to self (PR 83456)

2018-03-09 Thread Jeff Law
On 03/08/2018 05:46 PM, Martin Sebor wrote:
> On 03/08/2018 01:59 AM, Richard Biener wrote:
>> On Thu, Mar 8, 2018 at 12:01 AM, Martin Sebor  wrote:
>>> I have become convinced that issuing -Wrestrict in gimple-fold
>>> for calls to memcpy() where the source pointer is the same as
>>> the destination causes more trouble than it's worth, especially
>>> when inlining is involved, as in:
>>>
>>>   inline void bar (void *d, void *s, unsigned N)
>>>   {
>>> if (s != d)
>>>   memcpy (d, s, N);
>>>   }
>>>
>>>   void foo (void* src)
>>>   {
>>> bar (src, src, 1);
>>>   }
>>>
>>> It seems that there should be a way to teach GCC to avoid
>>> folding statements in dead blocks (e.g., in a block controlled
>>> by 'if (0 != 0)' as the one below), and that it might even speed
>>> up compilation, but in the meantime it leads to false positive
>>> -Wrestrict warnings.
>>>
>>> The attached patch removes this instance of the warning and
>>> adjusts tests not to expect it.
>>
>> Ok.
> 
> I'm sorry, I just discovered that simply removing the code from
> gimple-fold would cause a regression with respect to GCC 7.0
> which does diagnose memcpy(p, p, N) calls, and the diagnostic
> is useful to users (pr60256 and pr65452 request one for all
> string functions, although for reasons other than to detect
> overlapping accesses).
> 
> To maintain the warning and at the same time eliminate the pesky
> false positives from gimple-fold I've partially restored the
> detection in the front-end.  That in turn exposed some missing
> checking of the no-warning bit, leading to duplicate warnings.
> I've fixed all that up in the attached slightly bigger patch.
> 
> Martin
> 
> PS Even with the change to the front end above, removing
> the memcpy warning from gimple-fold has the downside of
> failing to detect bugs like:
> 
>   struct S { char a[32]; } s;
>   memcpy (, s.a, sizeof s.a);
> 
> because the front end doesn't know that  and s.a are at
> the same address (copying between distinct members of the
> same union causes another false negative).  I want to
> revisit this in stage 1 and see about handling it.  If
> you have suggestions/preference for how or where I'd
> appreciate your input.
I suspect the too-early folding is going to get in the way here.  In
some ways what you want is to either warn early or have a way to recover
the original arguments.  I'm not aware of a good way to do either for
this scenario though.


> 
> gcc-83456.diff
> 
> 
> PR tree-optimization/83456 - -Wrestrict false positive on a non-overlapping 
> memcpy in an inline function
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/83456
>   * gimple-fold.c (gimple_fold_builtin_memory_op): Avoid warning
>   for perfectly overlapping calls to memcpy.
>   (gimple_fold_builtin_memory_chk): Same.
>   (gimple_fold_builtin_strcpy): Handle no-warning.
>   (gimple_fold_builtin_stxcpy_chk): Same.
>   * tree-ssa-strlen.c (maybe_diag_stxncpy_trunc): Handle no-warning.
> 
> gcc/c-family/ChangeLog:
> 
>   PR tree-optimization/83456
>   * gcc/c-family/c-common.c (check_function_restrict): Return bool.
>   Restore checking of bounded built-in functions.
>   (check_function_arguments): Also return the result
>   of warn_for_restrict.
>   * gcc/c-family/c-common.c (check_function_restrict): Return bool.
>   * gcc/c-family/c-warn.c (warn_for_restrict): Return bool.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/83456
>   * c-c++-common/Wrestrict-2.c: Remove test cases.
>   * c-c++-common/Wrestrict.c: Same.
>   * gcc.dg/Wrestrict-12.c: New test.
>   * gcc.dg/Wrestrict-14.c: New test.
The self-copy via memcpy is particularly annoying.   In an ideal world
we'd never generate them, but we often do and it's been a source of many
complaints.

Ideally we'd never generate them.  Second best option is to fold them
away -- at least those generated by the compiler itself.

Anyway, this is OK for the trunk.  Feel free to open BZs for issues you
want to tackle during gcc-9.

Thanks,
jeff



Re: [PATCH] Fix tree-inline.c INDIRECT_REF handling (PR c++/84767)

2018-03-09 Thread Jeff Law
On 03/09/2018 11:06 AM, Jakub Jelinek wrote:
> Hi!
> 
> On the following testcase we ICE because when cloning the ctor using
> tree-inline.c infrastructure we don't remap the type of INDIRECT_REF,
> which needs to be remapped if it a variable length type, otherwise we
> refer to the parameters of the original ctor rather than of the base
> or complete ctor.
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?
> 
> 2018-03-09  Jakub Jelinek  
> 
>   PR c++/84767
>   * tree-inline.c (copy_tree_body_r): For INDIRECT_REF of a remapped
>   decl, use remap_type if we want to use the type.
> 
>   * g++.dg/ext/vla18.C: New test.
OK.
jeff


Re: [PATCH] fix ICE in generic_overlap (PR 84526)

2018-03-09 Thread Jeff Law
On 03/07/2018 03:52 PM, Martin Sebor wrote:
> On 03/07/2018 12:37 PM, Jeff Law wrote:
>> On 02/26/2018 10:32 AM, Martin Sebor wrote:
>>>
>>> PR tree-optimization/84526 - ICE in generic_overlap at 
>>> gcc/gimple-ssa-warn-restrict.c:927 since r257860
>>>
>>>
>>> gcc/ChangeLog:
>>>
>>> PR tree-optimization/84526
>>> * gimple-ssa-warn-restrict.c (builtin_memref::set_base_and_offset):
>>> Remove dead code.
>>> (builtin_access::generic_overlap): Be prepared to handle non-array
>>> base objects.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> PR tree-optimization/84526
>>> * gcc.dg/Wrestrict-10.c: New test.
>>>
>>> Index: gcc/gimple-ssa-warn-restrict.c
>>> ===
>>> --- gcc/gimple-ssa-warn-restrict.c(revision 257963)
>>> +++ gcc/gimple-ssa-warn-restrict.c(working copy)
>>> @@ -396,6 +396,9 @@ builtin_memref::set_base_and_offset (tree expr)
>>>    if (TREE_CODE (expr) == ADDR_EXPR)
>>>  expr = TREE_OPERAND (expr, 0);
>>>
>>> +  /* Stash the reference for offset validation.  */
>>> +  ref = expr;
>>> +
>>>    poly_int64 bitsize, bitpos;
>>>    tree var_off;
>>>    machine_mode mode;
>>> @@ -409,23 +412,33 @@ builtin_memref::set_base_and_offset (tree expr)
>>>    base = get_inner_reference (expr, , , _off,
>>>    , , , );
>>>
>>> +  /* get_inner_reference is not expected to return null.  */
>>> +  gcc_assert (base != NULL);
>>> +
>>>    poly_int64 bytepos = exact_div (bitpos, BITS_PER_UNIT);
>>>
>>> -  HOST_WIDE_INT const_off;
>>> -  if (!base || !bytepos.is_constant (_off))
>>> +  /* Convert the poly_int64 offset to to offset_int.  The offset
>>> + should be constant but be prepared for it not to be just in
>>> + case.  */
>>> +  offset_int cstoff;
>>> +  if (bytepos.is_constant ())
>>>  {
>>> -  base = get_base_address (TREE_OPERAND (expr, 0));
>>> -  return;
>>> +  offrange[0] += cstoff;
>>> +  offrange[1] += cstoff;
>>> +
>>> +  /* Besides the reference saved above, also stash the offset
>>> + for validation.  */
>>> +  if (TREE_CODE (expr) == COMPONENT_REF)
>>> +refoff = cstoff;
>>>  }
>>> +  else
>>> +offrange[1] += maxobjsize;
>> So is this assignment to offrange[1] really correct?
>>
>> ISTM that it potentially overflows (relative to MAXOBJSIZE) and that
>> you'd likely be better off just assigning offrange[1] to MAXOBJSIZE.
>>
>> Or alternately signal to the callers that we can't really analyze the
>> memory address and inhibit further analysis of the potential overlap of
>> the objects.
> 
> The offsets are stored in offset_int and there is code to deal
> with values outside the [-MAXOBJSIZE, MAXOBJSIZE] range, either
> by capping them to the bounds of the array/object being accessed
> if it's known, or to the MAXOBJSIZE range.  There are a number of
> places where offsets with unknown values are being added together
> and where their sum might end up exceeding that range (e.g., when
> dealing with multiple offsets, each in some range) but as long
> as the offsets are only added and not multiplied they should never
> overflow offset_int.  It would be okay to assign MAXOBJSIZE here
> instead of adding it, but there are other places with the same
> addition (see the bottom of builtin_memref::extend_offset_range)
> so doing it here alone would be inconsistent.
OK.  Thanks for explaining.

> 
>>> @@ -918,12 +924,18 @@ builtin_access::generic_overlap ()
>>>    if (!overlap_certain)
>>>  {
>>>    if (!dstref->strbounded_p && !depends_p)
>>> +/* Memcpy only considers certain overlap.  */
>>>  return false;
>>>
>>>    /* There's no way to distinguish an access to the same member
>>>   of a structure from one to two distinct members of the same
>>>   structure.  Give up to avoid excessive false positives.  */
>>> -  tree basetype = TREE_TYPE (TREE_TYPE (dstref->base));
>>> +  tree basetype = TREE_TYPE (dstref->base);
>>> +
>>> +  if (POINTER_TYPE_P (basetype)
>>> +  || TREE_CODE (basetype) == ARRAY_TYPE)
>>> +basetype = TREE_TYPE (basetype);
>>> +
>>>    if (RECORD_OR_UNION_TYPE_P (basetype))
>>>  return false;
>>>  }
>> This is probably fairly reasonable.  My only concern would be that we
>> handle multi-dimensional arrays correctly.  Do you need to handle
>> ARRAY_TYPEs with a loop?
> 
> Yes, good catch, thanks!  It's unrelated to this bug (the ICE) but
> it seems simple enough to handle at the same time so I've added
> the loop and a test to verify it works as expected.
> 
>> I do have a more general concern.  Given that we walk backwards through
>> pointer casts and ADDR_EXPRs we potentially lose information.  For
>> example we might walk backwards through a cast from a small array to a
>> larger array type.
Agreed.  Thanks for fixing this as well.  Agreed it seems simple enough
to go ahead and handle now.


>>
>> Thus later we use the smaller array type when computing the bounds and
>> 

Re: [PR84682] disregard address constraints on non-addresses

2018-03-09 Thread Jeff Law
On 03/08/2018 11:45 PM, Alexandre Oliva wrote:
> LRA gets very confused when non-addresses are passed as operands to
> asms with address contraints.  Even if other constraints are
> available, and the operand is a perfect fit for them, we'd still
> attempt to process the operand as an address, and fail miserably at
> that.
> 
> Truth is, address constraints expect operands allowed by
> address_operand, and we make sure this is the case throughout the
> compiler, even in asm statements.  The problem was that, if multiple
> constraints were available, we wouldn't insist that the operand be
> allowed by address_operand, but we would proceed as if it was,
> regardless of any other constraints.
> 
> To address this problem, I've arranged for LRA to attempt to deal with
> address-constrained operands as addresses only when the is_address
> flag is set, and to not set this flag in preprocess_constraints for
> asm operands that are not allowed by address_operand.
> 
> Regstrapped on i686- and x86_64-linux-gnu.  Ok to install?
> 
> for  gcc/ChangeLog
> 
>   PR rtl-optimization/84682
>   * lra-constraints.c (process_address_1): Check is_address flag
>   for address constraints.
>   (process_alt_operands): Likewise.
>   * lra.c (lra_set_insn_recog_data): Pass asm operand locs to
>   preprocess_constraints.
>   * recog.h (preprocess_constraints): Add oploc parameter.
>   Adjust callers.
>   * recog.c (preprocess_constraints): Test address_operand for
>   CT_ADDRESS constraints.
> 
> for  gcc/testsuite/ChangeLog
> 
>   PR rtl-optimization/84682
>   * gcc.dg/torture/pr84682-1.c: New.
>   * gcc.dg/torture/pr84682-2.c: New.
>   * gcc.dg/torture/pr84682-3.c: New.
I went ahead and committed this to the trunk.

jeff


[PATCH] Fix tree-inline.c INDIRECT_REF handling (PR c++/84767)

2018-03-09 Thread Jakub Jelinek
Hi!

On the following testcase we ICE because when cloning the ctor using
tree-inline.c infrastructure we don't remap the type of INDIRECT_REF,
which needs to be remapped if it a variable length type, otherwise we
refer to the parameters of the original ctor rather than of the base
or complete ctor.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2018-03-09  Jakub Jelinek  

PR c++/84767
* tree-inline.c (copy_tree_body_r): For INDIRECT_REF of a remapped
decl, use remap_type if we want to use the type.

* g++.dg/ext/vla18.C: New test.

--- gcc/tree-inline.c.jj2018-02-13 21:21:38.0 +0100
+++ gcc/tree-inline.c   2018-03-09 13:05:10.355154860 +0100
@@ -1192,6 +1192,7 @@ copy_tree_body_r (tree *tp, int *walk_su
  *tp = gimple_fold_indirect_ref (ptr);
  if (! *tp)
{
+ type = remap_type (type, id);
  if (TREE_CODE (ptr) == ADDR_EXPR)
{
  *tp
--- gcc/testsuite/g++.dg/ext/vla18.C.jj 2018-03-09 13:11:27.863229118 +0100
+++ gcc/testsuite/g++.dg/ext/vla18.C2018-03-09 13:10:54.319222521 +0100
@@ -0,0 +1,19 @@
+// PR c++/84767
+// { dg-do compile }
+// { dg-options "" }
+
+int v[1][10];
+
+struct A
+{
+  A (int);
+};
+
+A::A (int i)
+{
+  typedef int T[1][i];
+  T *x = (T *) v;
+  (*x)[0][0] = 0;
+}
+
+A a = 10;

Jakub


libgo patch committed: Add internal/trace to noinst_DATA

2018-03-09 Thread Ian Lance Taylor
This libgo patch adds internal/trace to noinst_DATA.  The
internal/trace package is only imported by tests (specifically the
tests in runtime/trace) so it must be in noinst_DATA to ensure that it
is built before running the tests.  This was mostly working because
internal/trace has tests itself, and is listed in check-packages.txt
before runtime/trace, so typical invocations of make would build
internal/trace for checking purposes before checking runtime/trace.
But we need this change to make that reliable.  Bootstrapped on
x86_64-pc-linux-gnu.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 258337)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-112623c89ee42b42bc748f12d9c704615634501b
+ce28919112dbb234366816ab39ce060ad45e8ca9
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/Makefile.am
===
--- libgo/Makefile.am   (revision 257914)
+++ libgo/Makefile.am   (working copy)
@@ -397,6 +397,7 @@ noinst_DATA = \
golang_org/x/net/internal/nettest.gox \
golang_org/x/net/nettest.gox \
internal/testenv.gox \
+   internal/trace.gox \
net/internal/socktest.gox \
os/signal/internal/pty.gox
 


Re: [C++ PATCH] For -Wformat purposes use ellipsis args before class to pointer to class conversion (PR c++/84076)

2018-03-09 Thread Jakub Jelinek
On Fri, Mar 09, 2018 at 11:28:47AM -0500, Jason Merrill wrote:
> On Thu, Mar 8, 2018 at 4:26 PM, Jakub Jelinek  wrote:
> > On Thu, Mar 08, 2018 at 04:20:40PM -0500, Jason Merrill wrote:
> >> On Thu, Mar 8, 2018 at 1:08 PM, Jakub Jelinek  wrote:
> >> > The reporter complains that -Wformat incorrectly reports std::string* 
> >> > passed
> >> > to "%s" format rather than std::string, which is what the user did.
> >> >
> >> > This transformation of non-trivial copy init or dtor classes in ellipsis 
> >> > is
> >> > done by convert_arg_to_ellipsis; that function does many changes and all
> >> > but this one look desirable for the 
> >> > -Wnonnull/-Wformat/-Wsentinel/-Wrestrict
> >> > warnings.  We prepare a special argument vector in any case, so this 
> >> > patch
> >> > just arranges to undo what convert_arg_to_ellipsis did for the classes.
> >> > I think -Wnonnull shouldn't care, because when passing such a class by
> >> > value, it will be non-NULL (and -Wnonnull looks only for literal NULLs
> >> > anyway), -Wrestrict only cares about named arguments and -Wsentinel only
> >> > cares about NULL arguments passed to ellipsis.
> >>
> >> I notice that convert_arg_to_ellipsis generates a pointer-typed
> >> expression, whereas convert_for_arg_passing generates a reference.
> >> Does correcting that inconsistency help?
> >
> > No (though I think it is a good idea anyway).
> 
> Perhaps then check_format_types could look through REFERENCE_TYPE,
> since there are no actual expressions of reference type.

It can be done in the caller as well like below, or in c-format.c's
  if (warn_format)
{
  /* FIXME: Rewrite all the internal functions in this file
 to use the ARGARRAY directly instead of constructing this
 temporary list.  */
  tree params = NULL_TREE;
  int i;
  for (i = nargs - 1; i >= 0; i--)
params = tree_cons (NULL_TREE, argarray[i], params);
  check_format_info (, params, arglocs);
}
I'm afraid the c-format.c has so many uses of the params list that it is
hard to tweak it anywhere else.

2018-03-09  Jason Merrill  
Jakub Jelinek  

PR c++/84076
* call.c (convert_arg_to_ellipsis): Instead of cp_build_addr_expr
build ADDR_EXPR with REFERENCE_TYPE.
(build_over_call): For purposes of check_function_arguments, if
argarray[j] is ADDR_EXPR with REFERENCE_TYPE created above, use
its operand rather than the argument itself.

* g++.dg/warn/Wformat-2.C: New test.

--- gcc/cp/call.c.jj2018-03-09 09:01:32.017423737 +0100
+++ gcc/cp/call.c   2018-03-09 18:12:34.973494714 +0100
@@ -7209,7 +7209,7 @@ convert_arg_to_ellipsis (tree arg, tsubs
 "passing objects of non-trivially-copyable "
 "type %q#T through %<...%> is conditionally supported",
 arg_type);
- return cp_build_addr_expr (arg, complain);
+ return build1 (ADDR_EXPR, build_reference_type (arg_type), arg);
}
   /* Build up a real lvalue-to-rvalue conversion in case the
 copy constructor is trivial but not callable.  */
@@ -8018,7 +8018,15 @@ build_over_call (struct z_candidate *can
   tree *fargs = (!nargs ? argarray
: (tree *) alloca (nargs * sizeof (tree)));
   for (j = 0; j < nargs; j++)
-   fargs[j] = maybe_constant_value (argarray[j]);
+   {
+ /* For -Wformat undo the implicit passing by hidden reference
+done by convert_arg_to_ellipsis.  */
+ if (TREE_CODE (argarray[j]) == ADDR_EXPR
+ && TREE_CODE (TREE_TYPE (argarray[j])) == REFERENCE_TYPE)
+   fargs[j] = TREE_OPERAND (argarray[j], 0);
+ else
+   fargs[j] = maybe_constant_value (argarray[j]);
+   }
 
   warned_p = check_function_arguments (input_location, fn, TREE_TYPE (fn),
   nargs, fargs, NULL);
--- gcc/testsuite/g++.dg/warn/Wformat-2.C.jj2018-03-09 17:59:57.098113181 
+0100
+++ gcc/testsuite/g++.dg/warn/Wformat-2.C   2018-03-09 17:59:57.098113181 
+0100
@@ -0,0 +1,17 @@
+// PR c++/84076
+// { dg-do compile }
+// { dg-options "-Wformat" }
+
+struct S { ~S (); };
+struct T { T (); T (const T &); };
+
+void
+foo ()
+{
+  S s;
+  T t;
+  __builtin_printf ("%s\n", s);// { dg-warning "format '%s' expects 
argument of type 'char\\*', but argument 2 has type 'S'" }
+  __builtin_printf ("%s\n", t);// { dg-warning "format '%s' expects 
argument of type 'char\\*', but argument 2 has type 'T'" }
+  __builtin_printf ("%s\n", );// { dg-warning "format '%s' expects argument 
of type 'char\\*', but argument 2 has type 'S\\*'" }
+  __builtin_printf ("%s\n", );// { dg-warning "format '%s' expects argument 
of type 'char\\*', but argument 2 has type 'T\\*'" }
+}


   

Re: [PATCH] Fix PR84777

2018-03-09 Thread Jakub Jelinek
On Fri, Mar 09, 2018 at 02:23:13PM +0100, Richard Biener wrote:
> 
> With -Os we inhibit most loop header copying which in turn will disable
> any vectorization attempt either via -fopenmp or via -ftree-vectorize.
> 
> The following makes sure the aggressive gate in 
> should_duplicate_loop_header_p is not applied for force-vectorize loops.
> There's still PARAM_MAX_LOOP_HEADER_INSNS limiting growth.
> 
> This reportedly makes -fopenmp -Os vectorize loops properly.
> 
> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> 
> Is this OK for trunk (and branches?) or do we want sth different
> in the end?

LGTM.

> 2018-03-09  Richard Biener  
> 
>   PR tree-optimization/84777
>   * tree-ssa-loop-ch.c (should_duplicate_loop_header_p): For
>   force-vectorize loops ignore whether we are optimizing for size.
> 
> Index: gcc/tree-ssa-loop-ch.c
> ===
> --- gcc/tree-ssa-loop-ch.c(revision 258380)
> +++ gcc/tree-ssa-loop-ch.c(working copy)
> @@ -57,7 +57,8 @@ should_duplicate_loop_header_p (basic_bl
>   be true, since quite often it is possible to verify that the condition 
> is
>   satisfied in the first iteration and therefore to eliminate it.  Jump
>   threading handles these cases now.  */
> -  if (optimize_loop_for_size_p (loop))
> +  if (optimize_loop_for_size_p (loop)
> +  && !loop->force_vectorize)
>  {
>if (dump_file && (dump_flags & TDF_DETAILS))
>   fprintf (dump_file,

Jakub


Re: [C++ PATCH] For -Wformat purposes use ellipsis args before class to pointer to class conversion (PR c++/84076)

2018-03-09 Thread Jason Merrill
OK.

On Fri, Mar 9, 2018 at 12:21 PM, Jakub Jelinek  wrote:
> On Fri, Mar 09, 2018 at 11:28:47AM -0500, Jason Merrill wrote:
>> On Thu, Mar 8, 2018 at 4:26 PM, Jakub Jelinek  wrote:
>> > On Thu, Mar 08, 2018 at 04:20:40PM -0500, Jason Merrill wrote:
>> >> On Thu, Mar 8, 2018 at 1:08 PM, Jakub Jelinek  wrote:
>> >> > The reporter complains that -Wformat incorrectly reports std::string* 
>> >> > passed
>> >> > to "%s" format rather than std::string, which is what the user did.
>> >> >
>> >> > This transformation of non-trivial copy init or dtor classes in 
>> >> > ellipsis is
>> >> > done by convert_arg_to_ellipsis; that function does many changes and all
>> >> > but this one look desirable for the 
>> >> > -Wnonnull/-Wformat/-Wsentinel/-Wrestrict
>> >> > warnings.  We prepare a special argument vector in any case, so this 
>> >> > patch
>> >> > just arranges to undo what convert_arg_to_ellipsis did for the classes.
>> >> > I think -Wnonnull shouldn't care, because when passing such a class by
>> >> > value, it will be non-NULL (and -Wnonnull looks only for literal NULLs
>> >> > anyway), -Wrestrict only cares about named arguments and -Wsentinel only
>> >> > cares about NULL arguments passed to ellipsis.
>> >>
>> >> I notice that convert_arg_to_ellipsis generates a pointer-typed
>> >> expression, whereas convert_for_arg_passing generates a reference.
>> >> Does correcting that inconsistency help?
>> >
>> > No (though I think it is a good idea anyway).
>>
>> Perhaps then check_format_types could look through REFERENCE_TYPE,
>> since there are no actual expressions of reference type.
>
> It can be done in the caller as well like below, or in c-format.c's
>   if (warn_format)
> {
>   /* FIXME: Rewrite all the internal functions in this file
>  to use the ARGARRAY directly instead of constructing this
>  temporary list.  */
>   tree params = NULL_TREE;
>   int i;
>   for (i = nargs - 1; i >= 0; i--)
> params = tree_cons (NULL_TREE, argarray[i], params);
>   check_format_info (, params, arglocs);
> }
> I'm afraid the c-format.c has so many uses of the params list that it is
> hard to tweak it anywhere else.
>
> 2018-03-09  Jason Merrill  
> Jakub Jelinek  
>
> PR c++/84076
> * call.c (convert_arg_to_ellipsis): Instead of cp_build_addr_expr
> build ADDR_EXPR with REFERENCE_TYPE.
> (build_over_call): For purposes of check_function_arguments, if
> argarray[j] is ADDR_EXPR with REFERENCE_TYPE created above, use
> its operand rather than the argument itself.
>
> * g++.dg/warn/Wformat-2.C: New test.
>
> --- gcc/cp/call.c.jj2018-03-09 09:01:32.017423737 +0100
> +++ gcc/cp/call.c   2018-03-09 18:12:34.973494714 +0100
> @@ -7209,7 +7209,7 @@ convert_arg_to_ellipsis (tree arg, tsubs
>  "passing objects of non-trivially-copyable "
>  "type %q#T through %<...%> is conditionally supported",
>  arg_type);
> - return cp_build_addr_expr (arg, complain);
> + return build1 (ADDR_EXPR, build_reference_type (arg_type), arg);
> }
>/* Build up a real lvalue-to-rvalue conversion in case the
>  copy constructor is trivial but not callable.  */
> @@ -8018,7 +8018,15 @@ build_over_call (struct z_candidate *can
>tree *fargs = (!nargs ? argarray
> : (tree *) alloca (nargs * sizeof (tree)));
>for (j = 0; j < nargs; j++)
> -   fargs[j] = maybe_constant_value (argarray[j]);
> +   {
> + /* For -Wformat undo the implicit passing by hidden reference
> +done by convert_arg_to_ellipsis.  */
> + if (TREE_CODE (argarray[j]) == ADDR_EXPR
> + && TREE_CODE (TREE_TYPE (argarray[j])) == REFERENCE_TYPE)
> +   fargs[j] = TREE_OPERAND (argarray[j], 0);
> + else
> +   fargs[j] = maybe_constant_value (argarray[j]);
> +   }
>
>warned_p = check_function_arguments (input_location, fn, TREE_TYPE 
> (fn),
>nargs, fargs, NULL);
> --- gcc/testsuite/g++.dg/warn/Wformat-2.C.jj2018-03-09 17:59:57.098113181 
> +0100
> +++ gcc/testsuite/g++.dg/warn/Wformat-2.C   2018-03-09 17:59:57.098113181 
> +0100
> @@ -0,0 +1,17 @@
> +// PR c++/84076
> +// { dg-do compile }
> +// { dg-options "-Wformat" }
> +
> +struct S { ~S (); };
> +struct T { T (); T (const T &); };
> +
> +void
> +foo ()
> +{
> +  S s;
> +  T t;
> +  __builtin_printf ("%s\n", s);// { dg-warning "format '%s' expects 
> argument of type 'char\\*', but argument 2 has type 'S'" }
> +  __builtin_printf ("%s\n", t);// { dg-warning "format '%s' expects 
> argument of type 'char\\*', but argument 2 has type 'T'" }
> +  

Re: [og7] Update nvptx_fork/join barrier placement

2018-03-09 Thread Cesar Philippidis
On 03/09/2018 08:21 AM, Tom de Vries wrote:
> On 03/09/2018 12:31 AM, Cesar Philippidis wrote:
>> Nvidia Volta GPUs now support warp-level synchronization.
> 
> Well, let's try to make that statement a bit more precise.
> 
> All Nvidia architectures have supported synchronization of threads in a
> warp on a very basic level: by means of convergence (and unfortunately,
> we've seen that this is very error-prone).
>
> What is new in ptx 6.0 combined with sm_70 is the ability to sync
> divergent threads without having to converge, f.i. by using new
> instructions bar.warp.sync and barrier.sync.

Yes. The major difference sm_70 GPU architectures and earlier GPUs is
that sm_70 allows the user to explicitly synchronize divergent warps. At
least on Maxwell and Pascal, the PTX SASS compiler uses two instructions
to branch, SYNC and BRA. I think, SYNC guarantees that a warp is
convergent at the SYNC point, whereas BRA makes no such guarantees.

What's worse, once a warp has become divergent on sm_60 and earlier
GPUs, there's no way to reliably reconverge them. So, to avoid that
problem, it critical that the PTX SASS compiler use SYNC instructions
when possible. Fortunately, bar.warp.sync resolves the divergent warp
problem on sm_70+.

>> As such, the
>> semantics of legacy bar.sync instructions have slightly changed on newer
>> GPUs.
> 
> Before in ptx 3.1, we have for bar.sync:
> ...
> Barriers are executed on a per-warp basis as if all the threads in a
> warp are active. Thus, if any thread in a warp executes a bar
> instruction, it is as if all the threads in the warp have executed
> the bar instruction. All threads in the warp are stalled until the
> barrier completes, and the arrival count for the barrier is incremented
> by the warp size (not the number of active threads in the warp). In
> conditionally executed code, a bar instruction should only be used if it
> is known that all threads evaluate the condition identically (the warp
> does not diverge).
> ...
> 
> But in ptx 6.0, we have:
> ...
> bar.sync is equivalent to barrier.sync.aligned
> ...
> and:
> ...
> Instruction barrier has optional .aligned modifier. When specified, it
> indicates that all threads in CTA will execute the same barrier
> instruction. In conditionally executed code, an aligned barrier
> instruction should only be used if it is known that all threads in
> CTA evaluate the condition identically, otherwise behavior is undefined.
> ...
> 
> So, in ptx 3.1 bar.sync should be executed in convergent mode (all the
> threads in each warp executing the same). But in ptx 6.0, bar.sync
> should be executed in the mode that the whole CTA is executing the same
> code.
> 
> So going from the description of ptx, it seems indeed that the semantics
> of bar.sync has changed. That is however surprising, since it would
> break the forward compatibility that AFAIU is the idea behind ptx.
> 
> So for now my hope is that this is a documentation error.

I spent a lot of time debugging deadlocks with the vector length changes
and I have see no changes in the SASS code generated in the newer Nvidia
drivers when compared to the older ones, at lease with respect to the
barrier instructions. This isn't the first time I've seen
inconsistencies with thread synchronization in Nvidia's documentation.
For the longest time, the "CUDA Programming Guide" provided slightly
conflicting semantics for the __syncthreads() function, which ultimately
gets implemented as bar.sync in PTX.

>> The PTX JIT will now, occasionally, emit a warpsync instruction
>> immediately before a bar.sync for Volta GPUs. That implies that warps
>> must be convergent on entry to those threads barriers.
>>
> 
> That warps must be convergent on entry to bar.sync is already required
> by ptx 3.1.
> 
> [ And bar.warp.sync does not force convergence, so if the warpsync
> instruction you mention is equivalent to bar.warp.sync then your
> reasoning is incorrect. ]

I'm under the impression that bar.warp.sync converges all of the
non-exited threads in a warp. You'd still need to use bar.sync or some
variant of the new barrier instruction to converge the entire CTA. But
at the moment, we're still generating code that's backwards compatible
with sm_30.

>> The problem in og7, and trunk, is that GCC emits barrier instructions at
>> the wrong spots. E.g., consider the following OpenACC parallel region:
>>
>>    #pragma acc parallel loop worker
>>    for (i = 0; i < 10; i++)
>>  a[i] = i;
>>
>> At -O2, GCC generates the following PTX code:
>>
>>  {
>>  .reg.u32    %y;
>>  mov.u32 %y, %tid.y;
>>  setp.ne.u32 %r76, %y, 0;
>>  }
>>  {
>>  .reg.u32    %x;
>>  mov.u32 %x, %tid.x;
>>  setp.ne.u32 %r75, %x, 0;
>>  }
>>  @%r76   bra.uni $L6;
>>  @%r75   bra $L7;
>>  mov.u64 %r67, %ar0;
>>  // fork 2;
>>  

Re: patch to fix PR83712

2018-03-09 Thread Jeff Law
On 03/09/2018 09:16 AM, Vladimir Makarov wrote:
> The following patch fixes
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83712
> 
> It is another "cannot find a spill reg for reload" problem.  LRA has
> already a code splitting hard reg live ranges to avoid such problem. 
> This code is in LRA inheritance pass.  Unfortunately, the code does
> splitting for small class pseudos only.  This PR is a more complicated
> code and it is hard to adapt the inheritance sub-pass to reliably solve
> such problems.
> 
> To fix the PR, I added a sub-pass which works in very rare cases after
> we already found that we have no hard regs for a reload pseudo.  It
> tries to split a hard reg live range for the pseudo. After that it tries
> again to assign a hard reg to the pseudo.  The patch changes LRA-subpass
> flow for this.  I hope that the patch will finally solved all such
> problems but I am not sure to be completely certain.
> 
> The patch was bootstrapped and tested on x86-64 and ppc64.
Thanks.  Once my builders pick up the change (a few hours) we'll have
wider test coverage.  I can also test the change against a closely
related regression BZ (I don't have the number handy, but it's a few
years old and is definitely related).

Jeff


Re: [C++ PATCH] For -Wformat purposes use ellipsis args before class to pointer to class conversion (PR c++/84076)

2018-03-09 Thread Jason Merrill
On Thu, Mar 8, 2018 at 4:26 PM, Jakub Jelinek  wrote:
> On Thu, Mar 08, 2018 at 04:20:40PM -0500, Jason Merrill wrote:
>> On Thu, Mar 8, 2018 at 1:08 PM, Jakub Jelinek  wrote:
>> > The reporter complains that -Wformat incorrectly reports std::string* 
>> > passed
>> > to "%s" format rather than std::string, which is what the user did.
>> >
>> > This transformation of non-trivial copy init or dtor classes in ellipsis is
>> > done by convert_arg_to_ellipsis; that function does many changes and all
>> > but this one look desirable for the 
>> > -Wnonnull/-Wformat/-Wsentinel/-Wrestrict
>> > warnings.  We prepare a special argument vector in any case, so this patch
>> > just arranges to undo what convert_arg_to_ellipsis did for the classes.
>> > I think -Wnonnull shouldn't care, because when passing such a class by
>> > value, it will be non-NULL (and -Wnonnull looks only for literal NULLs
>> > anyway), -Wrestrict only cares about named arguments and -Wsentinel only
>> > cares about NULL arguments passed to ellipsis.
>>
>> I notice that convert_arg_to_ellipsis generates a pointer-typed
>> expression, whereas convert_for_arg_passing generates a reference.
>> Does correcting that inconsistency help?
>
> No (though I think it is a good idea anyway).

Perhaps then check_format_types could look through REFERENCE_TYPE,
since there are no actual expressions of reference type.

Jason


Re: [og7] Update nvptx_fork/join barrier placement

2018-03-09 Thread Tom de Vries

On 03/09/2018 12:31 AM, Cesar Philippidis wrote:

Nvidia Volta GPUs now support warp-level synchronization.


Well, let's try to make that statement a bit more precise.

All Nvidia architectures have supported synchronization of threads in a 
warp on a very basic level: by means of convergence (and unfortunately, 
we've seen that this is very error-prone).


What is new in ptx 6.0 combined with sm_70 is the ability to sync 
divergent threads without having to converge, f.i. by using new 
instructions bar.warp.sync and barrier.sync.



As such, the
semantics of legacy bar.sync instructions have slightly changed on newer
GPUs.


Before in ptx 3.1, we have for bar.sync:
...
Barriers are executed on a per-warp basis as if all the threads in a 
warp are active. Thus, if any thread in a warp executes a bar 
instruction, it is as if all the threads in the warp have executed
the bar instruction. All threads in the warp are stalled until the 
barrier completes, and the arrival count for the barrier is incremented 
by the warp size (not the number of active threads in the warp). In 
conditionally executed code, a bar instruction should only be used if it 
is known that all threads evaluate the condition identically (the warp 
does not diverge).

...

But in ptx 6.0, we have:
...
bar.sync is equivalent to barrier.sync.aligned
...
and:
...
Instruction barrier has optional .aligned modifier. When specified, it 
indicates that all threads in CTA will execute the same barrier 
instruction. In conditionally executed code, an aligned barrier 
instruction should only be used if it is known that all threads in

CTA evaluate the condition identically, otherwise behavior is undefined.
...

So, in ptx 3.1 bar.sync should be executed in convergent mode (all the 
threads in each warp executing the same). But in ptx 6.0, bar.sync 
should be executed in the mode that the whole CTA is executing the same 
code.


So going from the description of ptx, it seems indeed that the semantics 
of bar.sync has changed. That is however surprising, since it would 
break the forward compatibility that AFAIU is the idea behind ptx.


So for now my hope is that this is a documentation error.


The PTX JIT will now, occasionally, emit a warpsync instruction
immediately before a bar.sync for Volta GPUs. That implies that warps
must be convergent on entry to those threads barriers.



That warps must be convergent on entry to bar.sync is already required 
by ptx 3.1.


[ And bar.warp.sync does not force convergence, so if the warpsync 
instruction you mention is equivalent to bar.warp.sync then your 
reasoning is incorrect. ]



The problem in og7, and trunk, is that GCC emits barrier instructions at
the wrong spots. E.g., consider the following OpenACC parallel region:

   #pragma acc parallel loop worker
   for (i = 0; i < 10; i++)
 a[i] = i;

At -O2, GCC generates the following PTX code:

 {
 .reg.u32%y;
 mov.u32 %y, %tid.y;
 setp.ne.u32 %r76, %y, 0;
 }
 {
 .reg.u32%x;
 mov.u32 %x, %tid.x;
 setp.ne.u32 %r75, %x, 0;
 }
 @%r76   bra.uni $L6;
 @%r75   bra $L7;
 mov.u64 %r67, %ar0;
 // fork 2;
 cvta.shared.u64 %r74, __oacc_bcast;
 st.u64  [%r74], %r67;
$L7:
$L6:
 @%r75   bra $L5;
 // forked 2;
 bar.sync0;
 cvta.shared.u64 %r73, __oacc_bcast;
 ld.u64  %r67, [%r73];
 mov.u32 %r62, %ntid.y;
 mov.u32 %r63, %tid.y;
 setp.gt.s32 %r68, %r63, 9;
 @%r68   bra $L2;
 mov.u32 %r55, %r63;
 cvt.s64.s32 %r69, %r62;
 shl.b64 %r59, %r69, 2;
 cvt.s64.s32 %r70, %r55;
 shl.b64 %r71, %r70, 2;
 add.u64 %r58, %r67, %r71;
$L3:
 st.u32  [%r58], %r55;
 add.u32 %r55, %r55, %r62;
 add.u64 %r58, %r58, %r59;
 setp.le.s32 %r72, %r55, 9;
 @%r72   bra $L3;
$L2:
 bar.sync1;
 // joining 2;
$L5:
 // join 2;
 ret;

Note the bar.sync instructions placed immediately after the forked
comment and before the joining comment. The problem here is that branch
above the forked comment guarantees that the warps are not synchronous
(when vector_length > 1, which is always the case). 


This is already advised against in ptx 3.1, so yes, we should fix this.


Likewise, bar.sync
instruction before joining should be placed after label L5 in order to
allow all of the threads in the warp to reach it.



Agreed.


The attached patch teaches the nvptx to make those adjustments.


Can you show me a diff of the ptx for the test-case above for trunk?


It
doesn't cause any regressions on legacy GPUs, but it does 

patch to fix PR83712

2018-03-09 Thread Vladimir Makarov

The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83712

It is another "cannot find a spill reg for reload" problem.  LRA has 
already a code splitting hard reg live ranges to avoid such problem.  
This code is in LRA inheritance pass.  Unfortunately, the code does 
splitting for small class pseudos only.  This PR is a more complicated 
code and it is hard to adapt the inheritance sub-pass to reliably solve 
such problems.


To fix the PR, I added a sub-pass which works in very rare cases after 
we already found that we have no hard regs for a reload pseudo.  It 
tries to split a hard reg live range for the pseudo. After that it tries 
again to assign a hard reg to the pseudo.  The patch changes LRA-subpass 
flow for this.  I hope that the patch will finally solved all such 
problems but I am not sure to be completely certain.


The patch was bootstrapped and tested on x86-64 and ppc64.

Committed as rev. 258390.

Index: ChangeLog
===
--- ChangeLog	(revision 258389)
+++ ChangeLog	(working copy)
@@ -1,3 +1,23 @@
+2018-03-09  Vladimir Makarov  
+
+	PR target/83712
+	* lra-assigns.c (assign_by_spills): Return a flag of reload
+	assignment failure.  Do not process the reload assignment
+	failures.  Do not spill other reload pseudos if they has the same
+	reg class.
+	(lra_assign): Add a return arg.  Set up from the result of
+	assign_by_spills call.
+	(find_reload_regno_insns, lra_split_hard_reg_for): New functions.
+	* lra-constraints.c (split_reg): Add a new arg.  Use it instead of
+	usage_insns if it is not NULL.
+	(spill_hard_reg_in_range): New function.
+	(split_if_necessary, inherit_in_ebb): Pass a new arg to split_reg.
+	* lra-int.h (spill_hard_reg_in_range, lra_split_hard_reg_for): New
+	function prototypes.
+	(lra_assign): Change prototype.
+	* lra.c (lra): Add code to deal with fails by splitting hard reg
+	live ranges.
+
 2018-03-09  Kyrylo Tkachov  
 
 	PR target/83193
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog	(revision 258389)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2018-03-09  Vladimir Makarov  
+
+	PR target/83712
+	* gcc.target/arm/pr83712.c: New.
+
 2018-03-09  Richard Biener  
 
 	PR tree-optimization/84775
Index: lra-int.h
===
--- lra-int.h	(revision 258389)
+++ lra-int.h	(working copy)
@@ -356,6 +356,7 @@ extern bool lra_constrain_insn (rtx_insn
 extern bool lra_constraints (bool);
 extern void lra_constraints_init (void);
 extern void lra_constraints_finish (void);
+extern bool spill_hard_reg_in_range (int, enum reg_class, rtx_insn *, rtx_insn *);
 extern void lra_inheritance (void);
 extern bool lra_undo_inheritance (void);
 
@@ -389,8 +390,8 @@ extern void lra_setup_reload_pseudo_pref
 extern int lra_assignment_iter;
 extern int lra_assignment_iter_after_spill;
 extern void lra_setup_reg_renumber (int, int, bool);
-extern bool lra_assign (void);
-
+extern bool lra_assign (bool &);
+extern void lra_split_hard_reg_for (void);
 
 /* lra-coalesce.c: */
 
Index: lra.c
===
--- lra.c	(revision 258389)
+++ lra.c	(working copy)
@@ -2460,38 +2460,53 @@ lra (FILE *f)
 	}
 	  if (live_p)
 	lra_clear_live_ranges ();
-	  /* We need live ranges for lra_assign -- so build them.  But
-	 don't remove dead insns or change global live info as we
-	 can undo inheritance transformations after inheritance
-	 pseudo assigning.  */
-	  lra_create_live_ranges (true, false);
-	  live_p = true;
-	  /* If we don't spill non-reload and non-inheritance pseudos,
-	 there is no sense to run memory-memory move coalescing.
-	 If inheritance pseudos were spilled, the memory-memory
-	 moves involving them will be removed by pass undoing
-	 inheritance.  */
-	  if (lra_simple_p)
-	lra_assign ();
-	  else
+	  bool fails_p;
+	  do
 	{
-	  bool spill_p = !lra_assign ();
-
-	  if (lra_undo_inheritance ())
-		live_p = false;
-	  if (spill_p)
+	  /* We need live ranges for lra_assign -- so build them.
+		 But don't remove dead insns or change global live
+		 info as we can undo inheritance transformations after
+		 inheritance pseudo assigning.  */
+	  lra_create_live_ranges (true, false);
+	  live_p = true;
+	  /* If we don't spill non-reload and non-inheritance
+		 pseudos, there is no sense to run memory-memory move
+		 coalescing.  If inheritance pseudos were spilled, the
+		 memory-memory moves involving them will be removed by
+		 pass undoing inheritance.  */
+	  if (lra_simple_p)
+		lra_assign (fails_p);
+	  else
 		{
-		  if (! live_p)
+		  bool spill_p = !lra_assign (fails_p);
+		  
+		  if (lra_undo_inheritance ())
+		live_p = false;
+		  if (spill_p 

Re: [C++ PATCH] Don't redefine __* builtins (PR c++/84724, take 2)

2018-03-09 Thread Jason Merrill
OK.

On Fri, Mar 9, 2018 at 6:05 AM, Jakub Jelinek  wrote:
> On Thu, Mar 08, 2018 at 04:06:24PM -0500, Jason Merrill wrote:
>> On Thu, Mar 8, 2018 at 1:01 PM, Jakub Jelinek  wrote:
>> > The C FE just warns and doesn't override builtins, but C++ FE
>> > on say int __builtin_trap (); will override the builtin, so later
>> > builtin_decl_explicit will return the bogus user function which e.g.
>> > doesn't have any merged attributes, can have different arguments or
>> > return type etc.
>> >
>> > This patch prevents that; generally the builtins we want to override
>> > are the DECL_ANTICIPATED which we handle properly earlier.
>> >
>> > The testcase in the PR ICEs not because of the argument mismatch (there is
>> > none in this case) or return value mismatch (because nothing cares about 
>> > the
>> > return value), but because the new decl lacks noreturn attribute and GCC
>> > relies on builtin_decl_explicit (BUILT_IN_TRAP) to be really noreturn.
>> >
>> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>> > Or shall we error on these, or ignore the name checks and just
>> > if (DECL_BUILT_IN (olddecl)) return NULL_TREE;, something else?
>>
>> Seems like returning NULL_TREE means that we overload the built-in,
>> which also seems undesirable; what if we return olddecl unmodified?
>> It would also be good to improve the diagnostic to say that we're
>> ignoring the declaration.  Perhaps as a permerror.
>
> So like this (if it passes bootstrap/regtest, though I think we don't have
> any tests other than these new ones that cover it anyway, so I don't expect
> issues)?
>
> 2018-03-09  Jakub Jelinek  
>
> PR c++/84724
> * decl.c (duplicate_decls): Don't override __* prefixed builtins
> except for __[^b]*_chk, instead issue permerror and for -fpermissive
> also a note and return olddecl.
>
> * g++.dg/ext/pr84724.C: New test.
>
> --- gcc/cp/decl.c.jj2018-03-08 21:53:44.989559552 +0100
> +++ gcc/cp/decl.c   2018-03-09 11:53:58.557156637 +0100
> @@ -1566,6 +1566,33 @@ next_arg:;
>|| compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)),
>  TYPE_ARG_TYPES (TREE_TYPE (olddecl
> {
> + /* Don't really override olddecl for __* prefixed builtins
> +except for __[^b]*_chk, the compiler might be using those
> +explicitly.  */
> + if (DECL_BUILT_IN (olddecl))
> +   {
> + tree id = DECL_NAME (olddecl);
> + const char *name = IDENTIFIER_POINTER (id);
> + size_t len;
> +
> + if (name[0] == '_'
> + && name[1] == '_'
> + && (strncmp (name + 2, "builtin_",
> +  strlen ("builtin_")) == 0
> + || (len = strlen (name)) <= strlen ("___chk")
> + || memcmp (name + len - strlen ("_chk"),
> +"_chk", strlen ("_chk") + 1) != 0))
> +   {
> + if (permerror (DECL_SOURCE_LOCATION (newdecl),
> +"new declaration %q#D ambiguates 
> built-in"
> +" declaration %q#D", newdecl, olddecl)
> + && flag_permissive)
> +   inform (DECL_SOURCE_LOCATION (newdecl),
> +   "ignoring the %q#D declaration", newdecl);
> + return olddecl;
> +   }
> +   }
> +
>   /* A near match; override the builtin.  */
>
>   if (TREE_PUBLIC (newdecl))
> --- gcc/testsuite/g++.dg/ext/pr84724-1.C.jj 2018-03-09 11:44:49.037993207 
> +0100
> +++ gcc/testsuite/g++.dg/ext/pr84724-1.C2018-03-09 11:57:11.599204801 
> +0100
> @@ -0,0 +1,14 @@
> +// PR c++/84724
> +// { dg-do compile }
> +// { dg-options "-O3 -fpermissive" }
> +
> +int __builtin_trap (); // { dg-warning "ambiguates built-in 
> declaration" }
> +   // { dg-message "ignoring the 'int 
> __builtin_trap\\(\\)' declaration" "" { target *-*-* } .-1 }
> +
> +int
> +foo ()
> +{
> +  int b;
> +  int c ();  // { dg-warning "invalid conversion from" }
> +  return b %= b ? c : 0;
> +}
> --- gcc/testsuite/g++.dg/ext/pr84724-2.C.jj 2018-03-09 11:57:26.017208398 
> +0100
> +++ gcc/testsuite/g++.dg/ext/pr84724-2.C2018-03-09 11:57:40.775212078 
> +0100
> @@ -0,0 +1,14 @@
> +// PR c++/84724
> +// { dg-do compile }
> +// { dg-options "-O3 -fpermissive -w" }
> +
> +int __builtin_trap (); // { dg-bogus "ambiguates built-in 
> declaration" }
> +   // { dg-bogus "ignoring the 'int 
> __builtin_trap\\(\\)' declaration" "" { target *-*-* } .-1 }
> +
> +int
> +foo ()
> +{
> +  int b;
> +  int c ();  // { dg-bogus 

Re: [C++ Patch] PR 71169 ("[7/8 Regression] ICE on invalid C++ code in pop_nested_class"), PR 71832 and more (Take 2)

2018-03-09 Thread Jason Merrill
Hmm, actually I think any_erroneous_template_args will return false
for error_mark_node.  Just moving the error_mark_node check up should
correct that.  OK with that change.

On Fri, Mar 9, 2018 at 10:40 AM, Paolo Carlini  wrote:
> Hi,
>
> On 09/03/2018 16:17, Jason Merrill wrote:
>>
>> You should be able to say
>>
>>if (any_erroneous_template_args (type))
>
> Indeed, that's what I tried to express earlier today, after a cappuccino ;)
>>
>> and I don't think you need the goto; the normal logic should handle
>> the truncated vecs fine.
>
> It should, otherwise something needs fixing anyway. And since by that time
> we are in error recovery it doesn't make any sense to micro-optimize
> anything. Thus, I'm re-testing (previous iteration was Ok) with the below.
> Ok if it passes?
>
> Thanks,
> Paolo.
>
> ///


Re: Add "native" as a valid option value for -march= on arm (PR driver/83193).

2018-03-09 Thread Martin Liška

On 03/09/2018 04:43 PM, Kyrill Tkachov wrote:


On 22/02/18 12:15, Martin Liška wrote:

On 02/22/2018 11:59 AM, Kyrill Tkachov wrote:

Hi Martin,

On 22/02/18 08:51, Martin Liška wrote:

On 02/21/2018 10:23 AM, Kyrill Tkachov wrote:

On arm we also support "native" as a value for -mcpu and -mtune. These are both 
handled by
the arm_print_hint_for_cpu_option function in the same file. Can you please add 
this snippet
to them as well?

Hi.

Can you please test for me attached follow up patch?

I've bootstrapped and tested this patch on a native and a cross compiler and 
confirmed
the help string for -mcpu behaves as intended.

This patch is ok for trunk.
Thanks,
Kyrill

Thanks for review. May I please ask you for taking look at:
2b) point in:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83193#c6

We should fix it (if possible) for GCC 8 as it's regression.


Thanks for the reminders Martin.
I've committed a patch to fix it with r258389.


I thank you for taking care of the ARM-specific issue. Let me adjust milestone
of the PR to 9. The rest of issue seen can't be fixed now.

Martin



Kyrill


Thanks,
Martin


Thanks,
Martin




Re: Add "native" as a valid option value for -march= on arm (PR driver/83193).

2018-03-09 Thread Kyrill Tkachov


On 22/02/18 12:15, Martin Liška wrote:

On 02/22/2018 11:59 AM, Kyrill Tkachov wrote:

Hi Martin,

On 22/02/18 08:51, Martin Liška wrote:

On 02/21/2018 10:23 AM, Kyrill Tkachov wrote:

On arm we also support "native" as a value for -mcpu and -mtune. These are both 
handled by
the arm_print_hint_for_cpu_option function in the same file. Can you please add 
this snippet
to them as well?

Hi.

Can you please test for me attached follow up patch?

I've bootstrapped and tested this patch on a native and a cross compiler and 
confirmed
the help string for -mcpu behaves as intended.

This patch is ok for trunk.
Thanks,
Kyrill

Thanks for review. May I please ask you for taking look at:
2b) point in:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83193#c6

We should fix it (if possible) for GCC 8 as it's regression.


Thanks for the reminders Martin.
I've committed a patch to fix it with r258389.

Kyrill


Thanks,
Martin


Thanks,
Martin




Re: [C++ Patch] PR 71169 ("[7/8 Regression] ICE on invalid C++ code in pop_nested_class"), PR 71832 and more (Take 2)

2018-03-09 Thread Paolo Carlini

Hi,

On 09/03/2018 16:17, Jason Merrill wrote:

You should be able to say

   if (any_erroneous_template_args (type))

Indeed, that's what I tried to express earlier today, after a cappuccino ;)

and I don't think you need the goto; the normal logic should handle
the truncated vecs fine.
It should, otherwise something needs fixing anyway. And since by that 
time we are in error recovery it doesn't make any sense to 
micro-optimize anything. Thus, I'm re-testing (previous iteration was 
Ok) with the below. Ok if it passes?


Thanks,
Paolo.

///
Index: cp/cp-tree.h
===
--- cp/cp-tree.h(revision 258381)
+++ cp/cp-tree.h(working copy)
@@ -6558,6 +6558,7 @@ extern int processing_template_parmlist;
 extern bool dependent_type_p   (tree);
 extern bool dependent_scope_p  (tree);
 extern bool any_dependent_template_arguments_p  (const_tree);
+extern bool any_erroneous_template_args_p   (const_tree);
 extern bool dependent_template_p   (tree);
 extern bool dependent_template_id_p(tree, tree);
 extern bool type_dependent_expression_p(tree);
Index: cp/parser.c
===
--- cp/parser.c (revision 258381)
+++ cp/parser.c (working copy)
@@ -22669,6 +22669,16 @@ cp_parser_class_specifier_1 (cp_parser* parser)
   cp_default_arg_entry *e;
   tree save_ccp, save_ccr;
 
+  if (any_erroneous_template_args_p (type))
+   {
+ /* Skip default arguments, NSDMIs, etc, in order to improve
+error recovery (c++/71169, c++/71832).  */
+ vec_safe_truncate (unparsed_funs_with_default_args, 0);
+ vec_safe_truncate (unparsed_nsdmis, 0);
+ vec_safe_truncate (unparsed_classes, 0);
+ vec_safe_truncate (unparsed_funs_with_definitions, 0);
+   }
+
   /* In a first pass, parse default arguments to the functions.
 Then, in a second pass, parse the bodies of the functions.
 This two-phased approach handles cases like:
Index: cp/pt.c
===
--- cp/pt.c (revision 258381)
+++ cp/pt.c (working copy)
@@ -25048,6 +25048,38 @@ any_dependent_template_arguments_p (const_tree arg
   return false;
 }
 
+/* Returns true if ARGS contains any errors.  */
+
+bool
+any_erroneous_template_args_p (const_tree args)
+{
+  int i;
+  int j;
+
+  if (args && TREE_CODE (args) != TREE_VEC)
+{
+  if (tree ti = get_template_info (args))
+   args = TI_ARGS (ti);
+  else
+   args = NULL_TREE;
+}
+
+  if (!args)
+return false;
+  if (args == error_mark_node)
+return true;
+
+  for (i = 0; i < TMPL_ARGS_DEPTH (args); ++i)
+{
+  const_tree level = TMPL_ARGS_LEVEL (args, i + 1);
+  for (j = 0; j < TREE_VEC_LENGTH (level); ++j)
+   if (error_operand_p (TREE_VEC_ELT (level, j)))
+ return true;
+}
+
+  return false;
+}
+
 /* Returns TRUE if the template TMPL is type-dependent.  */
 
 bool
Index: testsuite/g++.dg/cpp0x/pr71169-2.C
===
--- testsuite/g++.dg/cpp0x/pr71169-2.C  (nonexistent)
+++ testsuite/g++.dg/cpp0x/pr71169-2.C  (working copy)
@@ -0,0 +1,19 @@
+// { dg-do compile { target c++11 } }
+
+template  class A {  // { dg-error "declared" }
+  template  void m_fn1() {
+m_fn1();
+}
+};
+
+template
+struct B
+{
+  int f(int = 0) { return 0; }
+};
+
+int main()
+{
+  B b;
+  return b.f();
+}
Index: testsuite/g++.dg/cpp0x/pr71169.C
===
--- testsuite/g++.dg/cpp0x/pr71169.C(nonexistent)
+++ testsuite/g++.dg/cpp0x/pr71169.C(working copy)
@@ -0,0 +1,7 @@
+// { dg-do compile { target c++11 } }
+
+template  class A {  // { dg-error "declared" }
+  template  void m_fn1() {
+m_fn1();
+}
+};
Index: testsuite/g++.dg/cpp0x/pr71832.C
===
--- testsuite/g++.dg/cpp0x/pr71832.C(nonexistent)
+++ testsuite/g++.dg/cpp0x/pr71832.C(working copy)
@@ -0,0 +1,7 @@
+// { dg-do compile { target c++11 } }
+
+template < typename decltype (0) > struct A  // { dg-error "expected|two or 
more" }
+{ 
+  void foo () { baz (); }
+  template < typename ... S > void baz () {}
+};


[PATCH][arm] PR target/83193: Do not print arch/cpu hints twice on invalid -march/-mcpu

2018-03-09 Thread Kyrill Tkachov

Hi all,

Currently when handling an invalid -march or -mcpu option on a toolchain 
without an explicit --with-mode configuration
and compiling without an explicit -mthumb or -marm the arm specs end up calling 
arm_target_thumb_only to determine
the "thumbness" of the target, which involves parsing the architecture or cpu 
name. But the functions doing that
parsing also emit error messages and hints on invalid arguments. Later when we 
parse the architecture or cpu string to
as part of the canonicalisation process (arm_canon_arch_option) we end up 
emitting the errors again.

The solution in this patch is to silence the errors during the 
arm_target_thumb_only processing so that they are not emitted
twice. arm_canon_arch_option is guaranteed to run as well, so it can emit the 
errors and hints that it needs.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Checked that we emit the arch/cpu hints for invalid -march/-mcpu options only once when 
no "thumbness" options were specified
during configuration or invocation.

Committing to trunk.
Thanks,
Kyrill

2018-03-09  Kyrylo Tkachov  

PR target/83193
* common/config/arm/arm-common.c (arm_parse_arch_option_name):
Accept complain bool parameter.  Only emit errors if it is true.
(arm_parse_cpu_option_name): Likewise.
(arm_target_thumb_only): Adjust callers of the above.
* config/arm/arm-protos.h (arm_parse_cpu_option_name): Adjust
prototype to take a default true bool parameter.
(arm_parse_arch_option_name): Likewise.
diff --git a/gcc/common/config/arm/arm-common.c b/gcc/common/config/arm/arm-common.c
index a404d4b1562da7fc66b0f439f2459a90b62a170a..76c357b4258b68d0c2cd78cb54819c280d50963a 100644
--- a/gcc/common/config/arm/arm-common.c
+++ b/gcc/common/config/arm/arm-common.c
@@ -279,7 +279,8 @@ arm_target_thumb_only (int argc, const char **argv)
   if (arch)
 {
   const arch_option *arch_opt
-	= arm_parse_arch_option_name (all_architectures, "-march", arch);
+	= arm_parse_arch_option_name (all_architectures, "-march", arch,
+  false);
 
   if (arch_opt && !check_isa_bits_for (arch_opt->common.isa_bits,
 	   isa_bit_notm))
@@ -288,7 +289,7 @@ arm_target_thumb_only (int argc, const char **argv)
   else if (cpu)
 {
   const cpu_option *cpu_opt
-	= arm_parse_cpu_option_name (all_cores, "-mcpu", cpu);
+	= arm_parse_cpu_option_name (all_cores, "-mcpu", cpu, false);
 
   if (cpu_opt && !check_isa_bits_for (cpu_opt->common.isa_bits,
 	  isa_bit_notm))
@@ -329,10 +330,11 @@ arm_print_hint_for_cpu_option (const char *target,
 /* Parse the base component of a CPU selection in LIST.  Return a
pointer to the entry in the architecture table.  OPTNAME is the
name of the option we are parsing and can be used if a diagnostic
-   is needed.  */
+   is needed.  If COMPLAIN is true (the default) emit error
+   messages and hints on invalid input.  */
 const cpu_option *
 arm_parse_cpu_option_name (const cpu_option *list, const char *optname,
-			   const char *target)
+			   const char *target, bool complain)
 {
   const cpu_option *entry;
   const char *end  = strchr (target, '+');
@@ -345,8 +347,11 @@ arm_parse_cpu_option_name (const cpu_option *list, const char *optname,
 	return entry;
 }
 
-  error_at (input_location, "unrecognized %s target: %s", optname, target);
-  arm_print_hint_for_cpu_option (target, list);
+  if (complain)
+{
+  error_at (input_location, "unrecognized %s target: %s", optname, target);
+  arm_print_hint_for_cpu_option (target, list);
+}
   return NULL;
 }
 
@@ -379,10 +384,11 @@ arm_print_hint_for_arch_option (const char *target,
 /* Parse the base component of a CPU or architecture selection in
LIST.  Return a pointer to the entry in the architecture table.
OPTNAME is the name of the option we are parsing and can be used if
-   a diagnostic is needed.  */
+   a diagnostic is needed.  If COMPLAIN is true (the default) emit error
+   messages and hints on invalid input.  */
 const arch_option *
 arm_parse_arch_option_name (const arch_option *list, const char *optname,
-			const char *target)
+			const char *target, bool complain)
 {
   const arch_option *entry;
   const char *end  = strchr (target, '+');
@@ -395,8 +401,11 @@ arm_parse_arch_option_name (const arch_option *list, const char *optname,
 	return entry;
 }
 
-  error_at (input_location, "unrecognized %s target: %s", optname, target);
-  arm_print_hint_for_arch_option (target, list);
+  if (complain)
+{
+  error_at (input_location, "unrecognized %s target: %s", optname, target);
+  arm_print_hint_for_arch_option (target, list);
+}
   return NULL;
 }
 
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2b5bb255aac2507a63cbe71277df16e21feb3e79..8537262ce644bbacd7a800c708bcd86eea93d2bd 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -542,9 +542,9 @@ extern const arch_option 

Re: [og7] vector_length extension part 1: generalize function and variable names

2018-03-09 Thread Cesar Philippidis
On 03/09/2018 07:29 AM, Thomas Schwinge wrote:

> On Thu, 1 Mar 2018 13:17:01 -0800, Cesar Philippidis  
> wrote:
>> To reduce the size of the final patch,
>> I've separated all of the misc. function and variable renaming into this
>> patch.
> 
> Yes, please always do such refactoring changes independently of other
> functionality changes.
> 
> 
>> This patch also introduces a new populate_offload_attrs function.
> 
> I'm seeing:
> 
> [...]/gcc/config/nvptx/nvptx.c: In function 'void nvptx_reorg()':
> [...]/gcc/config/nvptx/nvptx.c:4451:3: warning: 
> 'oa.offload_attrs::vector_length' may be used uninitialized in this function 
> [-Wmaybe-uninitialized]
>if (oa->vector_length == 0)
>^
> [...]/gcc/config/nvptx/nvptx.c:4516:21: note: 
> 'oa.offload_attrs::vector_length' was declared here
>offload_attrs oa;
>  ^
> 
> That must be "populate_offload_attrs" inlined into "nvptx_reorg".  I
> can't tell yet why it complains about "vector_length" only but not about
> the others.

I got lazy and just merged that function as-is. That warning will go
away once the reset of the vector length changes are in.

Cesar


Re: [og7] vector_length extension part 1: generalize function and variable names

2018-03-09 Thread Thomas Schwinge
Hi!

On Thu, 1 Mar 2018 13:17:01 -0800, Cesar Philippidis  
wrote:
> To reduce the size of the final patch,
> I've separated all of the misc. function and variable renaming into this
> patch.

Yes, please always do such refactoring changes independently of other
functionality changes.


> This patch also introduces a new populate_offload_attrs function.

I'm seeing:

[...]/gcc/config/nvptx/nvptx.c: In function 'void nvptx_reorg()':
[...]/gcc/config/nvptx/nvptx.c:4451:3: warning: 
'oa.offload_attrs::vector_length' may be used uninitialized in this function 
[-Wmaybe-uninitialized]
   if (oa->vector_length == 0)
   ^
[...]/gcc/config/nvptx/nvptx.c:4516:21: note: 
'oa.offload_attrs::vector_length' was declared here
   offload_attrs oa;
 ^

That must be "populate_offload_attrs" inlined into "nvptx_reorg".  I
can't tell yet why it complains about "vector_length" only but not about
the others.

For reference:

> --- a/gcc/config/nvptx/nvptx.c
> +++ b/gcc/config/nvptx/nvptx.c

> +/* Offloading function attributes.  */
> +
> +struct offload_attrs
> +{
> +  unsigned mask;
> +  int num_gangs;
> +  int num_workers;
> +  int vector_length;
> +  int max_workers;
> +};

> +static void
> +populate_offload_attrs (offload_attrs *oa)
> +{
> +  tree attr = oacc_get_fn_attrib (current_function_decl);
> +  tree dims = TREE_VALUE (attr);
> +  unsigned ix;
> +
> +  oa->mask = 0;
> +
> +  for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
> +{
> +  tree t = TREE_VALUE (dims);
> +  int size = (t == NULL_TREE) ? 0 : TREE_INT_CST_LOW (t);
> +  tree allowed = TREE_PURPOSE (dims);
> +
> +  if (size != 1 && !(allowed && integer_zerop (allowed)))
> + oa->mask |= GOMP_DIM_MASK (ix);
> +
> +  switch (ix)
> + {
> + case GOMP_DIM_GANG:
> +   oa->num_gangs = size;
> +   break;
> +
> + case GOMP_DIM_WORKER:
> +   oa->num_workers = size;
> +   break;
> +
> + case GOMP_DIM_VECTOR:
> +   oa->vector_length = size;
> +   break;
> + }
> +}
> +
> +  if (oa->vector_length == 0)
> +{
> +  /* FIXME: Need a more graceful way to handle large vector
> +  lengths in OpenACC routines.  */
> +  if (!lookup_attribute ("omp target entrypoint",
> +  DECL_ATTRIBUTES (current_function_decl)))
> + oa->vector_length = PTX_WARP_SIZE;
> +  else
> + oa->vector_length = PTX_VECTOR_LENGTH;
> +}
> +  if (oa->num_workers == 0)
> +oa->max_workers = PTX_CTA_SIZE / oa->vector_length;
> +  else
> +oa->max_workers = oa->num_workers;
> +}

> @@ -4435,27 +4513,19 @@ nvptx_reorg (void)
>  {
>/* If we determined this mask before RTL expansion, we could
>elide emission of some levels of forks and joins.  */
> -  unsigned mask = 0;
> -  tree dims = TREE_VALUE (attr);
> -  unsigned ix;
> +  offload_attrs oa;
>  
> -  for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
> - {
> -   int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
> -   tree allowed = TREE_PURPOSE (dims);
> +  populate_offload_attrs ();
>  
> -   if (size != 1 && !(allowed && integer_zerop (allowed)))
> - mask |= GOMP_DIM_MASK (ix);
> - }
>/* If there is worker neutering, there must be vector
>neutering.  Otherwise the hardware will fail.  */
> -  gcc_assert (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
> -   || (mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
> +  gcc_assert (!(oa.mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
> +   || (oa.mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
>  
>/* Discover & process partitioned regions.  */
>parallel *pars = nvptx_discover_pars (_insn_map);
>nvptx_process_pars (pars);
> -  nvptx_neuter_pars (pars, mask, 0);
> +  nvptx_neuter_pars (pars, oa.mask, 0);
>delete pars;
>  }


Grüße
 Thomas


Re: [C++ Patch] PR 71169 ("[7/8 Regression] ICE on invalid C++ code in pop_nested_class"), PR 71832 and more (Take 2)

2018-03-09 Thread Jason Merrill
You should be able to say

  if (any_erroneous_template_args (type))

and I don't think you need the goto; the normal logic should handle
the truncated vecs fine.

On Fri, Mar 9, 2018 at 4:14 AM, Paolo Carlini  wrote:
> .. after a few hours of sleep, I realize that in principle we could also
> call any_erroneous_template_args_p directly. In that case however, I believe
> get_template_info does nothing 100% of the case, not sure we want to do
> that.
>
> Paolo.


Re: [PR84620] output symbolic entry_view as data2, not addr

2018-03-09 Thread Jason Merrill
On Fri, Mar 9, 2018 at 4:02 AM, Jakub Jelinek  wrote:
> On Fri, Mar 09, 2018 at 05:49:34AM -0300, Alexandre Oliva wrote:
>> @@ -27699,6 +27780,9 @@ dwarf2out_source_line (unsigned int line, unsigned 
>> int column,
>>   {
>> if (!RESETTING_VIEW_P (table->view))
>>   {
>> +   table->symviews_since_reset++;
>> +   if (table->symviews_since_reset > symview_upper_bound)
>> + symview_upper_bound = table->symviews_since_reset;
>
> You could have used
>   symview_upper_bound
> = MAX (symview_upper_bound, table->symviews_since_reset);
>
> Also, I'd say there should be
>   symview_upper_bound = 0;
> in dwarf2out_c_finalize for better behavior of GCC JIT.
>
> Otherwise LGTM, but please give Jason a day to comment on.

OK.

Jason


Re: Patch ping (Re: [PATCH PR82965/PR83991]Fix invalid profile count in vectorization peeling)

2018-03-09 Thread Bin.Cheng
On Fri, Mar 9, 2018 at 10:25 AM, Paul Hua  wrote:
> It's looks fixed
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82965#c12  on mips64el.
Hmm, is it fixed? or is it exposed now on mips64el?  I read the latter
from the comment.
I think the issue is like explained, but haven't dug into when/why it
is triggered in vect_peeling only for some targets.
Honza asked couple questions about my patch offline, I will revisit
the patch, see how to address
his concern.

Thanks,
bin
>
> Thanks.
>
> On Mon, Feb 26, 2018 at 8:02 PM, Bin.Cheng  wrote:
>> Ping^2
>>
>> Thanks,
>> bin
>>
>> On Mon, Feb 19, 2018 at 5:14 PM, Jakub Jelinek  wrote:
>>> Hi!
>>>
>>> Honza, do you think you could have a look at this P1 fix?
>>>
>>> Thanks.
>>>
>>> On Wed, Jan 31, 2018 at 10:03:51AM +, Bin Cheng wrote:
 Hi,
 This patch fixes invalid profile count information in vectorization 
 peeling.
 Current implementation is a bit confusing for me since it tries to compute
 an overall probability based on scaling probability and change of estimated
 niters.  This patch does it in two steps.  Firstly it does the scaling, 
 then
 it adjusts to new estimated niters by simply adjusting loop's latch count
 information; scaling the loop's count information by the proportion
 new_estimated_niters/old_estimate_niters.  Of course we have to adjust loop
 latch's count information back after scaling.

 Bootstrap and test on x86_64 and AArch64.  gcc.dg/vect/pr79347.c is fixed
 for both PR82965 and PR83991.  Is this OK?

 Thanks,
 bin

 2018-01-30  Bin Cheng  

   PR tree-optimization/82965
   PR tree-optimization/83991
   * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile
   information if the loop was predicted to iterate too many times.
>>>
 diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
 index b9b76d8..1f560b8 100644
 --- a/gcc/cfgloopmanip.c
 +++ b/gcc/cfgloopmanip.c
 @@ -509,7 +509,7 @@ scale_loop_profile (struct loop *loop, 
 profile_probability p,
   gcov_type iteration_bound)
  {
gcov_type iterations = expected_loop_iterations_unbounded (loop);
 -  edge e;
 +  edge e, preheader_e;
edge_iterator ei;

if (dump_file && (dump_flags & TDF_DETAILS))
 @@ -521,77 +521,66 @@ scale_loop_profile (struct loop *loop, 
 profile_probability p,
  (int)iteration_bound, (int)iterations);
  }

 +  /* Scale the probabilities.  */
 +  scale_loop_frequencies (loop, p);
 +
/* See if loop is predicted to iterate too many times.  */
 -  if (iteration_bound && iterations > 0
 -  && p.apply (iterations) > iteration_bound)
 +  if (iteration_bound == 0 || iterations <= 0
 +  || p.apply (iterations) <= iteration_bound)
 +return;
 +
 +  e = single_exit (loop);
 +  preheader_e = loop_preheader_edge (loop);
 +  profile_count count_in = preheader_e->count ();
 +  if (e && preheader_e
 +  && count_in > profile_count::zero ()
 +  && loop->header->count.initialized_p ())
  {
 -  /* Fixing loop profile for different trip count is not trivial; the 
 exit
 -  probabilities has to be updated to match and frequencies propagated 
 down
 -  to the loop body.
 -
 -  We fully update only the simple case of loop with single exit that 
 is
 -  either from the latch or BB just before latch and leads from BB with
 -  simple conditional jump.   This is OK for use in vectorizer.  */
 -  e = single_exit (loop);
 -  if (e)
 - {
 -   edge other_e;
 -   profile_count count_delta;
 +  edge other_e;
 +  profile_count count_delta;

 -  FOR_EACH_EDGE (other_e, ei, e->src->succs)
 - if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
 - && e != other_e)
 -   break;
 +  FOR_EACH_EDGE (other_e, ei, e->src->succs)
 + if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
 + && e != other_e)
 +   break;

 -   /* Probability of exit must be 1/iterations.  */
 -   count_delta = e->count ();
 -   e->probability = profile_probability::always ()
 +  /* Probability of exit must be 1/iterations.  */
 +  count_delta = e->count ();
 +  e->probability = profile_probability::always ()
   .apply_scale (1, iteration_bound);
 -   other_e->probability = e->probability.invert ();
 -   count_delta -= e->count ();
 -
 -   /* If latch exists, change its count, since we changed
 -  probability of exit.  Theoretically we should update everything 
 from
 -  

Re: [PR84682] disregard address constraints on non-addresses

2018-03-09 Thread Vladimir Makarov



On 03/09/2018 01:45 AM, Alexandre Oliva wrote:

LRA gets very confused when non-addresses are passed as operands to
asms with address contraints.  Even if other constraints are
available, and the operand is a perfect fit for them, we'd still
attempt to process the operand as an address, and fail miserably at
that.

Truth is, address constraints expect operands allowed by
address_operand, and we make sure this is the case throughout the
compiler, even in asm statements.  The problem was that, if multiple
constraints were available, we wouldn't insist that the operand be
allowed by address_operand, but we would proceed as if it was,
regardless of any other constraints.

To address this problem, I've arranged for LRA to attempt to deal with
address-constrained operands as addresses only when the is_address
flag is set, and to not set this flag in preprocess_constraints for
asm operands that are not allowed by address_operand.

Regstrapped on i686- and x86_64-linux-gnu.  Ok to install?

It looks ok for me.  Thank you, Alex.

for  gcc/ChangeLog

PR rtl-optimization/84682
* lra-constraints.c (process_address_1): Check is_address flag
for address constraints.
(process_alt_operands): Likewise.
* lra.c (lra_set_insn_recog_data): Pass asm operand locs to
preprocess_constraints.
* recog.h (preprocess_constraints): Add oploc parameter.
Adjust callers.
* recog.c (preprocess_constraints): Test address_operand for
CT_ADDRESS constraints.

for  gcc/testsuite/ChangeLog

PR rtl-optimization/84682
* gcc.dg/torture/pr84682-1.c: New.
* gcc.dg/torture/pr84682-2.c: New.
* gcc.dg/torture/pr84682-3.c: New.





Re: Seeking Release Manager approval for: [PATCH] jit: fix link on OS X and Solaris (PR jit/64089 and PR jit/84288)

2018-03-09 Thread Jakub Jelinek
On Fri, Mar 09, 2018 at 09:14:13AM -0500, David Malcolm wrote:
> On Wed, 2018-02-14 at 23:36 +0100, FX wrote:
> > I can confirm that, with the attached revised patch, a bootstrap with
> > --enable-languages=c,c++,jit --enable-host-shared is successful on
> > macOS.
> > 
> > FX
> 
> Looks good to me; thanks for fixing.
> 
> 
> Release managers:
> 
> I'd like to apply FX's patch here:
>   https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00881/patch
> to trunk, to fix the build of jit on OS X, and to make it easier to fix
> it on Solaris.
> 
> Is it OK for trunk now, or does this need to wait until next stage 1?

Ok for trunk now.

Jakub


Seeking Release Manager approval for: [PATCH] jit: fix link on OS X and Solaris (PR jit/64089 and PR jit/84288)

2018-03-09 Thread David Malcolm
On Wed, 2018-02-14 at 23:36 +0100, FX wrote:
> I can confirm that, with the attached revised patch, a bootstrap with
> --enable-languages=c,c++,jit --enable-host-shared is successful on
> macOS.
> 
> FX

Looks good to me; thanks for fixing.


Release managers:

I'd like to apply FX's patch here:
  https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00881/patch
to trunk, to fix the build of jit on OS X, and to make it easier to fix
it on Solaris.

This involves touching gcc/configure.ac (and configure).

I've successfully bootstrapped and regression-tested it on x86_64-pc-
linux-gnu.  FX reports above that it fixes the build on macOS, and
Rainer has an (untested) patch on top of it that ought to fix the build
on Solaris:
  https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00835.html

We're in stage 4, and the two bugs in question:
  PR jit/64089 ("libgccjit.so.0.0.1 linkage failure on darwin")
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64089

  PR jit/84288 ("Support jit on Solaris")
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84288
aren't regressions.

However, I believe this is a low-risk patch, and is mostly confined to
jit (and those targets).

Is it OK for trunk now, or does this need to wait until next stage 1?

Thanks
Dave


[PATCH] Fix PR84775

2018-03-09 Thread Richard Biener

The following reverts the fix for PR84178 again and instead prunes those
bogus immediate uses from stmts we didn't yet insert.  Micha has sth
in the works to clean up the mess somewhat in GCC 9.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2018-03-09  Richard Biener  

PR tree-optimization/84775
* tree-if-conv.c (add_bb_predicate_gimplified_stmts): Delink
immediate uses of predicate stmts and mark them modified.

Revert
PR tree-optimization/84178
* tree-if-conv.c (combine_blocks): Move insert_gimplified_predicates
to caller.
(version_loop_for_if_conversion): Delay update_ssa call.
(tree_if_conversion): Delay update_ssa until after predicate
insertion.

* gcc.dg/torture/pr84775.c: New testcase.

Index: gcc/testsuite/gcc.dg/torture/pr84775.c
===
--- gcc/testsuite/gcc.dg/torture/pr84775.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr84775.c  (working copy)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+int a, b, c, d, e, f;
+
+void fn1 ()
+{
+  while (e)
+for (f = 0; f < 4; f++)
+  {
+   int h[1];
+   for (; c; c++)
+ if (a)
+   break;
+   d = a - 4;
+   if (d)
+ b = h[0];
+  }
+}
Index: gcc/tree-if-conv.c
===
--- gcc/tree-if-conv.c  (revision 258380)
+++ gcc/tree-if-conv.c  (working copy)
@@ -257,6 +257,19 @@ set_bb_predicate_gimplified_stmts (basic
 static inline void
 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
 {
+  /* We might have updated some stmts in STMTS via force_gimple_operand
+ calling fold_stmt and that producing multiple stmts.  Delink immediate
+ uses so update_ssa after loop versioning doesn't get confused for
+ the not yet inserted predicates.
+ ???  This should go away once we reliably avoid updating stmts
+ not in any BB.  */
+  for (gimple_stmt_iterator gsi = gsi_start (stmts);
+   !gsi_end_p (gsi); gsi_next ())
+{
+  gimple *stmt = gsi_stmt (gsi);
+  delink_stmt_imm_use (stmt);
+  gimple_set_modified (stmt, true);
+}
   gimple_seq_add_seq_without_update
 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
 }
@@ -2371,6 +2384,7 @@ combine_blocks (struct loop *loop)
   edge_iterator ei;
 
   remove_conditions_and_labels (loop);
+  insert_gimplified_predicates (loop);
   predicate_all_scalar_phis (loop);
 
   if (any_pred_load_store)
@@ -2592,6 +2606,7 @@ version_loop_for_if_conversion (struct l
   gsi = gsi_last_bb (cond_bb);
   gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
   gsi_insert_before (, g, GSI_SAME_STMT);
+  update_ssa (TODO_update_ssa);
   return new_loop;
 }
 
@@ -2809,7 +2824,6 @@ tree_if_conversion (struct loop *loop)
   unsigned int todo = 0;
   bool aggressive_if_conv;
   struct loop *rloop;
-  bool need_update_ssa = false;
 
  again:
   rloop = NULL;
@@ -2855,7 +2869,6 @@ tree_if_conversion (struct loop *loop)
   struct loop *nloop = version_loop_for_if_conversion (vloop);
   if (nloop == NULL)
goto cleanup;
-  need_update_ssa = true;
   if (vloop != loop)
{
  /* If versionable_outer_loop_p decided to version the
@@ -2880,13 +2893,6 @@ tree_if_conversion (struct loop *loop)
}
 }
 
-  /* Due to hard to fix issues we end up with immediate uses recorded
- for not yet inserted predicates which will confuse SSA update so
- we delayed this from after versioning to after predicate insertion.  */
-  insert_gimplified_predicates (loop);
-  if (need_update_ssa)
-update_ssa (TODO_update_ssa);
-
   /* Now all statements are if-convertible.  Combine all the basic
  blocks into one huge basic block doing the if-conversion
  on-the-fly.  */


[PATCH] Fix PR84777

2018-03-09 Thread Richard Biener

With -Os we inhibit most loop header copying which in turn will disable
any vectorization attempt either via -fopenmp or via -ftree-vectorize.

The following makes sure the aggressive gate in 
should_duplicate_loop_header_p is not applied for force-vectorize loops.
There's still PARAM_MAX_LOOP_HEADER_INSNS limiting growth.

This reportedly makes -fopenmp -Os vectorize loops properly.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Is this OK for trunk (and branches?) or do we want sth different
in the end?

Thanks,
Richard.

2018-03-09  Richard Biener  

PR tree-optimization/84777
* tree-ssa-loop-ch.c (should_duplicate_loop_header_p): For
force-vectorize loops ignore whether we are optimizing for size.

Index: gcc/tree-ssa-loop-ch.c
===
--- gcc/tree-ssa-loop-ch.c  (revision 258380)
+++ gcc/tree-ssa-loop-ch.c  (working copy)
@@ -57,7 +57,8 @@ should_duplicate_loop_header_p (basic_bl
  be true, since quite often it is possible to verify that the condition is
  satisfied in the first iteration and therefore to eliminate it.  Jump
  threading handles these cases now.  */
-  if (optimize_loop_for_size_p (loop))
+  if (optimize_loop_for_size_p (loop)
+  && !loop->force_vectorize)
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,


[C++ PATCH] Don't redefine __* builtins (PR c++/84724, take 2)

2018-03-09 Thread Jakub Jelinek
On Thu, Mar 08, 2018 at 04:06:24PM -0500, Jason Merrill wrote:
> On Thu, Mar 8, 2018 at 1:01 PM, Jakub Jelinek  wrote:
> > The C FE just warns and doesn't override builtins, but C++ FE
> > on say int __builtin_trap (); will override the builtin, so later
> > builtin_decl_explicit will return the bogus user function which e.g.
> > doesn't have any merged attributes, can have different arguments or
> > return type etc.
> >
> > This patch prevents that; generally the builtins we want to override
> > are the DECL_ANTICIPATED which we handle properly earlier.
> >
> > The testcase in the PR ICEs not because of the argument mismatch (there is
> > none in this case) or return value mismatch (because nothing cares about the
> > return value), but because the new decl lacks noreturn attribute and GCC
> > relies on builtin_decl_explicit (BUILT_IN_TRAP) to be really noreturn.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> > Or shall we error on these, or ignore the name checks and just
> > if (DECL_BUILT_IN (olddecl)) return NULL_TREE;, something else?
> 
> Seems like returning NULL_TREE means that we overload the built-in,
> which also seems undesirable; what if we return olddecl unmodified?
> It would also be good to improve the diagnostic to say that we're
> ignoring the declaration.  Perhaps as a permerror.

So like this (if it passes bootstrap/regtest, though I think we don't have
any tests other than these new ones that cover it anyway, so I don't expect
issues)?

2018-03-09  Jakub Jelinek  

PR c++/84724
* decl.c (duplicate_decls): Don't override __* prefixed builtins
except for __[^b]*_chk, instead issue permerror and for -fpermissive
also a note and return olddecl.

* g++.dg/ext/pr84724.C: New test.

--- gcc/cp/decl.c.jj2018-03-08 21:53:44.989559552 +0100
+++ gcc/cp/decl.c   2018-03-09 11:53:58.557156637 +0100
@@ -1566,6 +1566,33 @@ next_arg:;
   || compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)),
 TYPE_ARG_TYPES (TREE_TYPE (olddecl
{
+ /* Don't really override olddecl for __* prefixed builtins
+except for __[^b]*_chk, the compiler might be using those
+explicitly.  */
+ if (DECL_BUILT_IN (olddecl))
+   {
+ tree id = DECL_NAME (olddecl);
+ const char *name = IDENTIFIER_POINTER (id);
+ size_t len;
+
+ if (name[0] == '_'
+ && name[1] == '_'
+ && (strncmp (name + 2, "builtin_",
+  strlen ("builtin_")) == 0
+ || (len = strlen (name)) <= strlen ("___chk")
+ || memcmp (name + len - strlen ("_chk"),
+"_chk", strlen ("_chk") + 1) != 0))
+   {
+ if (permerror (DECL_SOURCE_LOCATION (newdecl),
+"new declaration %q#D ambiguates built-in"
+" declaration %q#D", newdecl, olddecl)
+ && flag_permissive)
+   inform (DECL_SOURCE_LOCATION (newdecl),
+   "ignoring the %q#D declaration", newdecl);
+ return olddecl;
+   }
+   }
+
  /* A near match; override the builtin.  */
 
  if (TREE_PUBLIC (newdecl))
--- gcc/testsuite/g++.dg/ext/pr84724-1.C.jj 2018-03-09 11:44:49.037993207 
+0100
+++ gcc/testsuite/g++.dg/ext/pr84724-1.C2018-03-09 11:57:11.599204801 
+0100
@@ -0,0 +1,14 @@
+// PR c++/84724
+// { dg-do compile }
+// { dg-options "-O3 -fpermissive" }
+
+int __builtin_trap (); // { dg-warning "ambiguates built-in 
declaration" }
+   // { dg-message "ignoring the 'int 
__builtin_trap\\(\\)' declaration" "" { target *-*-* } .-1 }
+
+int
+foo ()
+{
+  int b;
+  int c ();  // { dg-warning "invalid conversion from" }
+  return b %= b ? c : 0;
+}
--- gcc/testsuite/g++.dg/ext/pr84724-2.C.jj 2018-03-09 11:57:26.017208398 
+0100
+++ gcc/testsuite/g++.dg/ext/pr84724-2.C2018-03-09 11:57:40.775212078 
+0100
@@ -0,0 +1,14 @@
+// PR c++/84724
+// { dg-do compile }
+// { dg-options "-O3 -fpermissive -w" }
+
+int __builtin_trap (); // { dg-bogus "ambiguates built-in declaration" 
}
+   // { dg-bogus "ignoring the 'int 
__builtin_trap\\(\\)' declaration" "" { target *-*-* } .-1 }
+
+int
+foo ()
+{
+  int b;
+  int c ();  // { dg-bogus "invalid conversion from" }
+  return b %= b ? c : 0;
+}
--- gcc/testsuite/g++.dg/ext/pr84724-3.C.jj 2018-03-09 11:57:50.000214382 
+0100
+++ gcc/testsuite/g++.dg/ext/pr84724-3.C2018-03-09 11:58:10.797219571 
+0100
@@ -0,0 +1,5 @@
+// PR c++/84724
+// { dg-do compile }
+// 

[PR c++/84733] ICE in check-local-shadow

2018-03-09 Thread Nathan Sidwell
This patch fixes the ICE in check-local-shadow, but reverts behaviour to 
the earlier ICE in pop_local_binding.


Check local shadow had a check when finding an outer local scope for 
function_parm_scope.  But that code didn't make sense and was 
unreachable with the current testsuite.  So I removed it.  However, this 
testcase needed it.


It needed it because do_pushdecl_with_scope clears 
current_function_decl, even if the scope being pushed into happens to be 
the same function.  And in all uses cases, we're either pushing into a 
namespace scope (so it should be cleared) or pushig into a class scope 
(so it should be unchanged).


The code in pushtag that does this looks funky.  I don;t think we do the 
same when pushing non-tags, and that's weird.  Probably related to the 
older ICE in pop_local_binding.  That happens because we're popping the 
VAR_DECL for 'e' but find the TYPE_DECL -- somethings getting the 
ordering wrong.  I was unable to find a testcase that triggered the new 
ice without also triggering the old ice.


I'm leaving the bug open, but with a simpler testcase. At least the 
regression is fixed.


nathan

--
Nathan Sidwell
2018-03-09  Nathan Sidwell  

	PR c++/84733
	* name-lookup.c (do_pushdecl_with_scope): Only clear
	current_function_decl when pushing a non-class (i.e. namespace)
	scope.

Index: cp/name-lookup.c
===
--- cp/name-lookup.c	(revision 258338)
+++ cp/name-lookup.c	(working copy)
@@ -3965,9 +3965,7 @@ static tree
 do_pushdecl_with_scope (tree x, cp_binding_level *level, bool is_friend)
 {
   cp_binding_level *b;
-  tree function_decl = current_function_decl;
 
-  current_function_decl = NULL_TREE;
   if (level->kind == sk_class)
 {
   b = class_binding_level;
@@ -3977,12 +3975,15 @@ do_pushdecl_with_scope (tree x, cp_bindi
 }
   else
 {
+  tree function_decl = current_function_decl;
+  if (level->kind == sk_namespace)
+	current_function_decl = NULL_TREE;
   b = current_binding_level;
   current_binding_level = level;
   x = pushdecl (x, is_friend);
   current_binding_level = b;
+  current_function_decl = function_decl;
 }
-  current_function_decl = function_decl;
   return x;
 }
 


Re: [SFN+LVU+IEPM v4 9/9] [IEPM] Introduce inline entry point markers

2018-03-09 Thread Jakub Jelinek
On Fri, Mar 09, 2018 at 09:48:45AM +, Bin.Cheng wrote:
> On Wed, Feb 28, 2018 at 6:17 AM, Alexandre Oliva  wrote:
> > On Feb 21, 2018, Alexandre Oliva  wrote:
> >
> >> On Feb 15, 2018, Szabolcs Nagy  wrote:
> >>> i see assembler slow downs with these location view patches
> >>> i opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84408
> >
> >
> >> [LVU] reset view at function entry, omit views at line zero
> >
> > Ping?  https://gcc.gnu.org/ml/gcc-patches/2018-02/msg01224.html
> 
> Hi,
> The new test case failed on aarch64-none-elf, aarch64_be-none-elf and
> arm-none-eabi ,
> which are all bare-metal toolchains with below message:
> 
> xgcc: error: unrecognized command line option '-pthread'
> 
> I assume pthread is unavailable on such targets if it's required.

I think this ought to fix it, committed as obvious:

2018-03-09  Jakub Jelinek  

PR debug/84404
* gcc.dg/graphite/pr84404.c: Only compile on pthread effective
targets.

--- gcc/testsuite/gcc.dg/graphite/pr84404.c.jj  2018-03-08 21:53:46.096560261 
+0100
+++ gcc/testsuite/gcc.dg/graphite/pr84404.c 2018-03-09 10:54:49.605773196 
+0100
@@ -1,4 +1,5 @@
-/* { dg-do compile } */
+/* PR debug/84404 */
+/* { dg-do compile { target pthread } } */
 /* { dg-options "-O2 -ftree-parallelize-loops=2 -floop-nest-optimize -g" } */
 
 int te[9];


Jakub


Re: Patch ping (Re: [PATCH PR82965/PR83991]Fix invalid profile count in vectorization peeling)

2018-03-09 Thread Paul Hua
It's looks fixed
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82965#c12  on mips64el.

Thanks.

On Mon, Feb 26, 2018 at 8:02 PM, Bin.Cheng  wrote:
> Ping^2
>
> Thanks,
> bin
>
> On Mon, Feb 19, 2018 at 5:14 PM, Jakub Jelinek  wrote:
>> Hi!
>>
>> Honza, do you think you could have a look at this P1 fix?
>>
>> Thanks.
>>
>> On Wed, Jan 31, 2018 at 10:03:51AM +, Bin Cheng wrote:
>>> Hi,
>>> This patch fixes invalid profile count information in vectorization peeling.
>>> Current implementation is a bit confusing for me since it tries to compute
>>> an overall probability based on scaling probability and change of estimated
>>> niters.  This patch does it in two steps.  Firstly it does the scaling, then
>>> it adjusts to new estimated niters by simply adjusting loop's latch count
>>> information; scaling the loop's count information by the proportion
>>> new_estimated_niters/old_estimate_niters.  Of course we have to adjust loop
>>> latch's count information back after scaling.
>>>
>>> Bootstrap and test on x86_64 and AArch64.  gcc.dg/vect/pr79347.c is fixed
>>> for both PR82965 and PR83991.  Is this OK?
>>>
>>> Thanks,
>>> bin
>>>
>>> 2018-01-30  Bin Cheng  
>>>
>>>   PR tree-optimization/82965
>>>   PR tree-optimization/83991
>>>   * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile
>>>   information if the loop was predicted to iterate too many times.
>>
>>> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
>>> index b9b76d8..1f560b8 100644
>>> --- a/gcc/cfgloopmanip.c
>>> +++ b/gcc/cfgloopmanip.c
>>> @@ -509,7 +509,7 @@ scale_loop_profile (struct loop *loop, 
>>> profile_probability p,
>>>   gcov_type iteration_bound)
>>>  {
>>>gcov_type iterations = expected_loop_iterations_unbounded (loop);
>>> -  edge e;
>>> +  edge e, preheader_e;
>>>edge_iterator ei;
>>>
>>>if (dump_file && (dump_flags & TDF_DETAILS))
>>> @@ -521,77 +521,66 @@ scale_loop_profile (struct loop *loop, 
>>> profile_probability p,
>>>  (int)iteration_bound, (int)iterations);
>>>  }
>>>
>>> +  /* Scale the probabilities.  */
>>> +  scale_loop_frequencies (loop, p);
>>> +
>>>/* See if loop is predicted to iterate too many times.  */
>>> -  if (iteration_bound && iterations > 0
>>> -  && p.apply (iterations) > iteration_bound)
>>> +  if (iteration_bound == 0 || iterations <= 0
>>> +  || p.apply (iterations) <= iteration_bound)
>>> +return;
>>> +
>>> +  e = single_exit (loop);
>>> +  preheader_e = loop_preheader_edge (loop);
>>> +  profile_count count_in = preheader_e->count ();
>>> +  if (e && preheader_e
>>> +  && count_in > profile_count::zero ()
>>> +  && loop->header->count.initialized_p ())
>>>  {
>>> -  /* Fixing loop profile for different trip count is not trivial; the 
>>> exit
>>> -  probabilities has to be updated to match and frequencies propagated 
>>> down
>>> -  to the loop body.
>>> -
>>> -  We fully update only the simple case of loop with single exit that is
>>> -  either from the latch or BB just before latch and leads from BB with
>>> -  simple conditional jump.   This is OK for use in vectorizer.  */
>>> -  e = single_exit (loop);
>>> -  if (e)
>>> - {
>>> -   edge other_e;
>>> -   profile_count count_delta;
>>> +  edge other_e;
>>> +  profile_count count_delta;
>>>
>>> -  FOR_EACH_EDGE (other_e, ei, e->src->succs)
>>> - if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
>>> - && e != other_e)
>>> -   break;
>>> +  FOR_EACH_EDGE (other_e, ei, e->src->succs)
>>> + if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
>>> + && e != other_e)
>>> +   break;
>>>
>>> -   /* Probability of exit must be 1/iterations.  */
>>> -   count_delta = e->count ();
>>> -   e->probability = profile_probability::always ()
>>> +  /* Probability of exit must be 1/iterations.  */
>>> +  count_delta = e->count ();
>>> +  e->probability = profile_probability::always ()
>>>   .apply_scale (1, iteration_bound);
>>> -   other_e->probability = e->probability.invert ();
>>> -   count_delta -= e->count ();
>>> -
>>> -   /* If latch exists, change its count, since we changed
>>> -  probability of exit.  Theoretically we should update everything 
>>> from
>>> -  source of exit edge to latch, but for vectorizer this is enough. 
>>>  */
>>> -   if (loop->latch
>>> -   && loop->latch != e->src)
>>> - {
>>> -   loop->latch->count += count_delta;
>>> - }
>>> - }
>>> +  other_e->probability = e->probability.invert ();
>>>
>>>/* Roughly speaking we want to reduce the loop body profile by the
>>>difference of loop iterations.  We however can do better if
>>>we look at the actual profile, if it is available.  */
>>> -  p = p.apply_scale 

Re: [SFN+LVU+IEPM v4 9/9] [IEPM] Introduce inline entry point markers

2018-03-09 Thread Ramana Radhakrishnan
On Fri, Mar 9, 2018 at 9:48 AM, Bin.Cheng  wrote:
> On Wed, Feb 28, 2018 at 6:17 AM, Alexandre Oliva  wrote:
>> On Feb 21, 2018, Alexandre Oliva  wrote:
>>
>>> On Feb 15, 2018, Szabolcs Nagy  wrote:
 i see assembler slow downs with these location view patches
 i opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84408
>>
>>
>>> [LVU] reset view at function entry, omit views at line zero
>>
>> Ping?  https://gcc.gnu.org/ml/gcc-patches/2018-02/msg01224.html
>
> Hi,
> The new test case failed on aarch64-none-elf, aarch64_be-none-elf and
> arm-none-eabi ,
> which are all bare-metal toolchains with below message:
>
> xgcc: error: unrecognized command line option '-pthread'
>
> I assume pthread is unavailable on such targets if it's required.

I think there's a dg-effective-target for pthread.

Ramana

>
> Thanks,
> bin
>>
>>> for  gcc/ChangeLog
>>
>>>   PR debug/84404
>>>   PR debug/84408
>>>   * dwarf2out.c (struct dw_line_info_table): Update comments for
>>>   view == -1.
>>>   (FORCE_RESET_NEXT_VIEW): New.
>>>   (FORCE_RESETTING_VIEW_P): New.
>>>   (RESETTING_VIEW_P): Check for -1 too.
>>>   (ZERO_VIEW_P): Likewise.
>>>   (new_line_info_table): Force-reset next view.
>>>   (dwarf2out_begin_function): Likewise.
>>>   (dwarf2out_source_line): Simplify zero_view_p initialization.
>>>   Test FORCE_RESETTING_VIEW_P and RESETTING_VIEW_P instead of
>>>   view directly.  Omit view when omitting .loc at line 0.
>>
>>> for  gcc/testsuite/ChangeLog
>>
>>>   PR debug/84404
>>>   PR debug/84408
>>>   * gcc.dg/graphite/pr84404.c: New.
>>
>> --
>> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
>> You must be the change you wish to see in the world. -- Gandhi
>> Be Free! -- http://FSFLA.org/   FSF Latin America board member
>> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [SFN+LVU+IEPM v4 9/9] [IEPM] Introduce inline entry point markers

2018-03-09 Thread Bin.Cheng
On Wed, Feb 28, 2018 at 6:17 AM, Alexandre Oliva  wrote:
> On Feb 21, 2018, Alexandre Oliva  wrote:
>
>> On Feb 15, 2018, Szabolcs Nagy  wrote:
>>> i see assembler slow downs with these location view patches
>>> i opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84408
>
>
>> [LVU] reset view at function entry, omit views at line zero
>
> Ping?  https://gcc.gnu.org/ml/gcc-patches/2018-02/msg01224.html

Hi,
The new test case failed on aarch64-none-elf, aarch64_be-none-elf and
arm-none-eabi ,
which are all bare-metal toolchains with below message:

xgcc: error: unrecognized command line option '-pthread'

I assume pthread is unavailable on such targets if it's required.

Thanks,
bin
>
>> for  gcc/ChangeLog
>
>>   PR debug/84404
>>   PR debug/84408
>>   * dwarf2out.c (struct dw_line_info_table): Update comments for
>>   view == -1.
>>   (FORCE_RESET_NEXT_VIEW): New.
>>   (FORCE_RESETTING_VIEW_P): New.
>>   (RESETTING_VIEW_P): Check for -1 too.
>>   (ZERO_VIEW_P): Likewise.
>>   (new_line_info_table): Force-reset next view.
>>   (dwarf2out_begin_function): Likewise.
>>   (dwarf2out_source_line): Simplify zero_view_p initialization.
>>   Test FORCE_RESETTING_VIEW_P and RESETTING_VIEW_P instead of
>>   view directly.  Omit view when omitting .loc at line 0.
>
>> for  gcc/testsuite/ChangeLog
>
>>   PR debug/84404
>>   PR debug/84408
>>   * gcc.dg/graphite/pr84404.c: New.
>
> --
> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
> You must be the change you wish to see in the world. -- Gandhi
> Be Free! -- http://FSFLA.org/   FSF Latin America board member
> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [PR84620] output symbolic entry_view as data2, not addr

2018-03-09 Thread Jakub Jelinek
On Fri, Mar 09, 2018 at 05:49:34AM -0300, Alexandre Oliva wrote:
> @@ -27699,6 +27780,9 @@ dwarf2out_source_line (unsigned int line, unsigned 
> int column,
>   {
> if (!RESETTING_VIEW_P (table->view))
>   {
> +   table->symviews_since_reset++;
> +   if (table->symviews_since_reset > symview_upper_bound)
> + symview_upper_bound = table->symviews_since_reset;

You could have used
  symview_upper_bound
= MAX (symview_upper_bound, table->symviews_since_reset);

Also, I'd say there should be
  symview_upper_bound = 0;
in dwarf2out_c_finalize for better behavior of GCC JIT.

Otherwise LGTM, but please give Jason a day to comment on.

Jakub


Re: [C++ Patch] PR 71169 ("[7/8 Regression] ICE on invalid C++ code in pop_nested_class"), PR 71832 and more (Take 2)

2018-03-09 Thread Paolo Carlini
.. after a few hours of sleep, I realize that in principle we could 
also call any_erroneous_template_args_p directly. In that case however, 
I believe get_template_info does nothing 100% of the case, not sure we 
want to do that.


Paolo.


Fix PR target/84763

2018-03-09 Thread Eric Botcazou
This is a small regression just introduced on the mainline by my patch fixing 
the -freorder-blocks-and-partition optimization on x86-64/Windows.  On this 
platform, the SEH scheme imposes constraints on the prologue of functions, in 
particular the frame pointer is near the bottom of the frame.  But there is an 
exception when __builtin_{frame,return}_address is present in the code and the 
implementation of i386_pe_seh_cold_init fails to account for it.

Tested on x86-64/Windows, applied on the mainline as obvious.


2018-03-09  Eric Botcazou  <ebotca...@adacore.com>

PR target/84763
* config/i386/winnt.c (i386_pe_seh_cold_init): Use small pre-allocation
when the function accesses prior frames.


2018-03-09  Eric Botcazou  <ebotca...@adacore.com>

* gcc.c-torture/compile/20180309-1.c: New test.

-- 
Eric BotcazouIndex: config/i386/winnt.c
===
--- config/i386/winnt.c	(revision 258338)
+++ config/i386/winnt.c	(working copy)
@@ -879,7 +879,7 @@ void
 i386_pe_seh_cold_init (FILE *f, const char *name)
 {
   struct seh_frame_state *seh;
-  HOST_WIDE_INT offset;
+  HOST_WIDE_INT alloc_offset, offset;
 
   if (!TARGET_SEH)
 return;
@@ -891,7 +891,16 @@ i386_pe_seh_cold_init (FILE *f, const ch
   assemble_name (f, name);
   fputc ('\n', f);
 
-  offset = seh->sp_offset - INCOMING_FRAME_SP_OFFSET;
+  /* In the normal case, the frame pointer is near the bottom of the frame
+ so we can do the full stack allocation and set it afterwards.  There
+ is an exception when the function accesses prior frames so, in this
+ case, we need to pre-allocate a small chunk before setting it.  */
+  if (crtl->accesses_prior_frames)
+alloc_offset = seh->cfa_offset;
+  else
+alloc_offset = seh->sp_offset;
+
+  offset = alloc_offset - INCOMING_FRAME_SP_OFFSET;
   if (offset > 0 && offset < SEH_MAX_FRAME_SIZE)
 fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
 
@@ -903,12 +912,12 @@ i386_pe_seh_cold_init (FILE *f, const ch
 		 : (gcc_unreachable (), "")), f);
 	print_reg (gen_rtx_REG (DImode, regno), 0, f);
 	fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n",
-		 seh->sp_offset - seh->reg_offset[regno]);
+		 alloc_offset - seh->reg_offset[regno]);
   }
 
   if (seh->cfa_reg != stack_pointer_rtx)
 {
-  offset = seh->sp_offset - seh->cfa_offset;
+  offset = alloc_offset - seh->cfa_offset;
 
   gcc_assert ((offset & 15) == 0);
   gcc_assert (IN_RANGE (offset, 0, 240));
@@ -918,6 +927,13 @@ i386_pe_seh_cold_init (FILE *f, const ch
   fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
 }
 
+  if (crtl->accesses_prior_frames)
+{
+  offset = seh->sp_offset - alloc_offset;
+  if (offset > 0 && offset < SEH_MAX_FRAME_SIZE)
+	fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
+}
+
   fputs ("\t.seh_endprologue\n", f);
 }
 
/* PR target/84763 */
/* { dg-require-effective-target return_address } */

extern void abort (void);

void *foo (unsigned int *data, unsigned int len)
{
  unsigned int local_data[128];

  if (len > 128)
abort ();

  for (unsigned int i = 0; i < len; i++)
local_data[i] = data[i] + data[len - 1 - i] * 2;

  void *ret = __builtin_frame_address (0);

  for (unsigned int i = 0; i < len; i++)
ret = ret + local_data[i] % 8;

  return ret;
}


Re: [PR84620] output symbolic entry_view as data2, not addr

2018-03-09 Thread Alexandre Oliva
On Mar  8, 2018, Jakub Jelinek  wrote:

> An upper bound would be fine, count any potential spot where you introduce a
> view and if the upper bound fits into 8-bit value, use data1, if into 16-bit
> value, use data2, otherwise use data4.

[IEPM] [PR debug/84620] use constant form for DW_AT_GNU_entry_view

When outputting entry views in symbolic mode, we used to use a lbl_id,
but that outputs the view as an addr, perhaps even in an indirect one,
which is all excessive and undesirable for a small assembler-computed
constant.

Introduce a new value class for symbolic views, so that we can output
the labels as constant data, using as narrow forms as possible, but
wide enough for any symbolic views output in the compilation.  We
don't know exactly where the assembler will reset views, but we count
the symbolic views since known reset points and use that as an upper
bound for view numbers.

Ideally, we'd use uleb128, but then the compiler would have to defer
.debug_info offset computation to the assembler.  I'm not going there
for now, so a symbolic uleb128 assembler constant in an attribute is
not something GCC can deal with ATM.

Regstrapped on i686- and x86_64-linux-gnu, with binutils 2.30.  Through
visual inspection, I've confirmed there are consistent uses of data1 and
data2 for DW_AT_GNU_entry_view in libgcc and libstdc++; wider const
forms were not needed.  Ok to install?

for  gcc/ChangeLog

PR debug/84620
* dwarf2out.h (dw_val_class): Add dw_val_class_symview.
(dw_val_node): Add val_symbolic_view.
* dwarf2out.c (dw_line_info_table): Add symviews_since_reset.
(symview_upper_bound): New.
(new_line_info_table): Initialize symviews_since_reset.
(dwarf2out_source_line): Count symviews_since_reset and set
symview_upper_bound.
(dw_val_equal_p): Handle symview.
(add_AT_symview): New.
(print_dw_val): Handle symview.
(attr_checksum, attr_checksum_ordered): Likewise.
(same_dw_val_p, size_of_die): Likewise.
(value_format, output_die): Likewise.
(add_high_low_attributes): Use add_AT_symview for entry_view.
---
 gcc/dwarf2out.c |   97 ---
 gcc/dwarf2out.h |4 ++
 2 files changed, 94 insertions(+), 7 deletions(-)

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 0348f4460e98..fbf87378ac79 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1434,6 +1434,8 @@ dw_val_equal_p (dw_val_node *a, dw_val_node *b)
   return a->v.val_die_ref.die == b->v.val_die_ref.die;
 case dw_val_class_fde_ref:
   return a->v.val_fde_index == b->v.val_fde_index;
+case dw_val_class_symview:
+  return strcmp (a->v.val_symbolic_view, b->v.val_symbolic_view) == 0;
 case dw_val_class_lbl_id:
 case dw_val_class_lineptr:
 case dw_val_class_macptr:
@@ -2951,6 +2953,11 @@ struct GTY(()) dw_line_info_table {
  going to ask the assembler to assign.  */
   var_loc_view view;
 
+  /* This counts the number of symbolic views emitted in this table
+ since the latest view reset.  Its max value, over all tables,
+ sets symview_upper_bound.  */
+  var_loc_view symviews_since_reset;
+
 #define FORCE_RESET_NEXT_VIEW(x) ((x) = (var_loc_view)-1)
 #define RESET_NEXT_VIEW(x) ((x) = (var_loc_view)0)
 #define FORCE_RESETTING_VIEW_P(x) ((x) == (var_loc_view)-1)
@@ -2959,6 +2966,13 @@ struct GTY(()) dw_line_info_table {
   vec *entries;
 };
 
+/* This is an upper bound for view numbers that the assembler may
+   assign to symbolic views output in this translation.  It is used to
+   decide how big a field to use to represent view numbers in
+   symview-classed attributes.  */
+
+static var_loc_view symview_upper_bound;
+
 /* If we're keep track of location views and their reset points, and
INSN is a reset point (i.e., it necessarily advances the PC), mark
the next view in TABLE as reset.  */
@@ -3603,6 +3617,7 @@ static addr_table_entry *add_addr_table_entry (void *, 
enum ate_kind);
 static void remove_addr_table_entry (addr_table_entry *);
 static void add_AT_addr (dw_die_ref, enum dwarf_attribute, rtx, bool);
 static inline rtx AT_addr (dw_attr_node *);
+static void add_AT_symview (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_lbl_id (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_lineptr (dw_die_ref, enum dwarf_attribute, const char *);
 static void add_AT_macptr (dw_die_ref, enum dwarf_attribute, const char *);
@@ -5117,6 +5132,21 @@ add_AT_vms_delta (dw_die_ref die, enum dwarf_attribute 
attr_kind,
   add_dwarf_attr (die, );
 }
 
+/* Add a symbolic view identifier attribute value to a DIE.  */
+
+static inline void
+add_AT_symview (dw_die_ref die, enum dwarf_attribute attr_kind,
+   const char *view_label)
+{
+  dw_attr_node attr;
+
+  attr.dw_attr = attr_kind;
+  attr.dw_attr_val.val_class = dw_val_class_symview;
+