Update profile in gimple-ssa-isolate-paths.c

2017-06-16 Thread Jan Hubicka
Hi,
while separating an impossible path, we shoud make sure it is seen as cold
by profile.

Bootstrapped/regtested x86_64-linux, will commit it later today.

Honza

* gimple-ssa-isolate-paths.c (isolate_path): Set edge leading to path
as unlikely; update profile.

Index: gimple-ssa-isolate-paths.c
===
--- gimple-ssa-isolate-paths.c  (revision 249244)
+++ gimple-ssa-isolate-paths.c  (working copy)
@@ -137,6 +137,15 @@ isolate_path (basic_block bb, basic_bloc
   gimple_stmt_iterator si, si2;
   edge_iterator ei;
   edge e2;
+  bool impossible = true;
+
+  for (si = gsi_start_bb (bb); gsi_stmt (si) != stmt; gsi_next ())
+if (stmt_can_terminate_bb_p (gsi_stmt (si)))
+  {
+   impossible = false;
+   break;
+  }
+  force_edge_cold (e, impossible);
 
   /* First duplicate BB if we have not done so already and remove all
  the duplicate's outgoing edges as duplicate is going to unconditionally
@@ -145,10 +154,14 @@ isolate_path (basic_block bb, basic_bloc
   if (!duplicate)
 {
   duplicate = duplicate_block (bb, NULL, NULL);
+  bb->frequency = 0;
+  bb->count = profile_count::zero ();
   if (!ret_zero)
for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (ei)); )
  remove_edge (e2);
 }
+  bb->frequency += EDGE_FREQUENCY (e);
+  bb->count += e->count;
 
   /* Complete the isolation step by redirecting E to reach DUPLICATE.  */
   e2 = redirect_edge_and_branch (e, duplicate);


[C++ PATCH] tsubst_baselink formatting

2017-06-16 Thread Nathan Sidwell
Before diving into pr15272 this patch cleans up tsubst_baselink's wonky 
formatting, and adds a bit of clarity.


applied to trunk.

nathan
--
Nathan Sidwell
2017-06-16  Nathan Sidwell  

	* pt.c (tsubst_baselink): Fix & clarify formatting.

Index: pt.c
===
--- pt.c	(revision 249272)
+++ pt.c	(working copy)
@@ -14215,93 +14215,89 @@ tsubst (tree t, tree args, tsubst_flags_
 }
 }
 
-/* Like tsubst_expr for a BASELINK.  OBJECT_TYPE, if non-NULL, is the
-   type of the expression on the left-hand side of the "." or "->"
-   operator.  */
+/* tsubst a BASELINK.  OBJECT_TYPE, if non-NULL, is the type of the
+   expression on the left-hand side of the "." or "->" operator.  A
+   baselink indicates a function from a base class.  Both the
+   BASELINK_ACCESS_BINFO and the base class referenced may indicate
+   bases of the template class, rather than the instantiated class.
+   In addition, lookups that were not ambiguous before may be
+   ambiguous now.  Therefore, we perform the lookup again.  */
 
 static tree
 tsubst_baselink (tree baselink, tree object_type,
 		 tree args, tsubst_flags_t complain, tree in_decl)
 {
-tree name;
-tree qualifying_scope;
-tree fns;
-tree optype;
-tree template_args = 0;
-bool template_id_p = false;
-bool qualified = BASELINK_QUALIFIED_P (baselink);
-
-/* A baselink indicates a function from a base class.  Both the
-   BASELINK_ACCESS_BINFO and the base class referenced may
-   indicate bases of the template class, rather than the
-   instantiated class.  In addition, lookups that were not
-   ambiguous before may be ambiguous now.  Therefore, we perform
-   the lookup again.  */
-qualifying_scope = BINFO_TYPE (BASELINK_ACCESS_BINFO (baselink));
-qualifying_scope = tsubst (qualifying_scope, args,
-			   complain, in_decl);
-fns = BASELINK_FUNCTIONS (baselink);
-optype = tsubst (BASELINK_OPTYPE (baselink), args, complain, in_decl);
-if (TREE_CODE (fns) == TEMPLATE_ID_EXPR)
-  {
-	template_id_p = true;
-	template_args = TREE_OPERAND (fns, 1);
-	fns = TREE_OPERAND (fns, 0);
-	if (template_args)
-	  template_args = tsubst_template_args (template_args, args,
-		complain, in_decl);
-  }
-name = OVL_NAME (fns);
-if (IDENTIFIER_TYPENAME_P (name))
-  name = mangle_conv_op_name_for_type (optype);
-baselink = lookup_fnfields (qualifying_scope, name, /*protect=*/1);
-if (!baselink)
-  {
-	if (constructor_name_p (name, qualifying_scope))
-	  {
-	if (complain & tf_error)
-	  error ("cannot call constructor %<%T::%D%> directly",
-		 qualifying_scope, name);
-	  }
-	return error_mark_node;
-  }
-
-/* If lookup found a single function, mark it as used at this
-   point.  (If it lookup found multiple functions the one selected
-   later by overload resolution will be marked as used at that
-   point.)  */
-if (BASELINK_P (baselink))
-  fns = BASELINK_FUNCTIONS (baselink);
-if (!template_id_p && !really_overloaded_fn (fns)
-	&& !mark_used (OVL_FIRST (fns), complain) && !(complain & tf_error))
+  bool qualified = BASELINK_QUALIFIED_P (baselink);
+
+  tree qualifying_scope = BINFO_TYPE (BASELINK_ACCESS_BINFO (baselink));
+  qualifying_scope = tsubst (qualifying_scope, args, complain, in_decl);
+
+  tree optype = BASELINK_OPTYPE (baselink);
+  optype = tsubst (optype, args, complain, in_decl);
+
+  tree template_args = NULL_TREE;
+  bool template_id_p = false;
+  tree fns = BASELINK_FUNCTIONS (baselink);
+  if (TREE_CODE (fns) == TEMPLATE_ID_EXPR)
+{
+  template_id_p = true;
+  template_args = TREE_OPERAND (fns, 1);
+  fns = TREE_OPERAND (fns, 0);
+  if (template_args)
+	template_args = tsubst_template_args (template_args, args,
+	  complain, in_decl);
+}
+
+  tree name = OVL_NAME (fns);
+  if (IDENTIFIER_TYPENAME_P (name))
+name = mangle_conv_op_name_for_type (optype);
+
+  baselink = lookup_fnfields (qualifying_scope, name, /*protect=*/1);
+  if (!baselink)
+{
+  if ((complain & tf_error) && constructor_name_p (name, qualifying_scope))
+	error ("cannot call constructor %<%T::%D%> directly",
+	   qualifying_scope, name);
   return error_mark_node;
+}
+
+  /* If lookup found a single function, mark it as used at this point.
+ (If it lookup found multiple functions the one selected later by
+ overload resolution will be marked as used at that point.)  */
+  if (BASELINK_P (baselink))
+fns = BASELINK_FUNCTIONS (baselink);
+  if (!template_id_p && !really_overloaded_fn (fns)
+  && !mark_used (OVL_FIRST (fns), complain) && !(complain & tf_error))
+return error_mark_node;
+
+  if (BASELINK_P (baselink))
+{
+  /* Add back the template arguments, if present.  */
+  if (template_id_p)
+	BASELINK_FUNCTIONS (baselink)
+	  = build2 (TEMPLATE_ID_EXPR, unknown_type_node,
+		

Re: [PATCH 1/7] [ARC] Add support for naked functions.

2017-06-16 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-06-01 15:34:51 
+0200]:

> gcc/
> 2016-12-13  Claudiu Zissulescu  
>   Andrew Burgess  
> 
>   * config/arc/arc-protos.h (arc_compute_function_type): Change prototype.
>   (arc_return_address_register): New function.
>   * config/arc/arc.c (arc_handle_fndecl_attribute): New function.
>   (arc_handle_fndecl_attribute): Add naked attribute.
>   (TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS): Define.
>   (TARGET_WARN_FUNC_RETURN): Likewise.
>   (arc_allocate_stack_slots_for_args): New function.
>   (arc_warn_func_return): Likewise.
>   (machine_function): Change type fn_type.
>   (arc_compute_function_type): Consider new naked function type,
>   change function return type.
>   (arc_must_save_register): Adapt to handle new
>   arc_compute_function_type's return type.
>   (arc_expand_prologue): Likewise.
>   (arc_expand_epilogue): Likewise.
>   (arc_return_address_regs): Delete.
>   (arc_return_address_register): New function.
>   (arc_epilogue_uses): Use above function.
>   * config/arc/arc.h (arc_return_address_regs): Delete prototype.
>   (arc_function_type): Change encoding, add naked type.
>   (ARC_INTERRUPT_P): Change to handle the new encoding.
>   (ARC_FAST_INTERRUPT_P): Likewise.
>   (ARC_NORMAL_P): Define.
>   (ARC_NAKED_P): Likewise.
>   (arc_compute_function_type): Delete prototype.
>   * config/arc/arc.md (in_ret_delay_slot): Use
>   arc_return_address_register function.
>   (simple_return): Likewise.
>   (p_return_i): Likewise.
> 
> gcc/testsuite
> 2016-12-13  Claudiu Zissulescu  
>   Andrew Burgess  
> 
>   * gcc.target/arc/naked-1.c: New file.
>   * gcc.target/arc/naked-2.c: Likewise.

Claudiu,

Sorry it's taken me a while to look at these patches.

I tried to apply this to the current GCC head, and it looks for me
like this doesn't apply.  Specifically `arc_expand_epilogue` does not
appear (in the current head) to have the code expected in this patch.

I have double checked at my end, but could you confirm that the patch
does apply cleanly for you please, then I'll spend some additional
time trying to figure out what I've done wrong :)

Thanks,
Andrew





> ---
>  gcc/config/arc/arc-protos.h|   6 +-
>  gcc/config/arc/arc.c   | 165 
> -
>  gcc/config/arc/arc.h   |  40 +---
>  gcc/config/arc/arc.md  |  10 +-
>  gcc/testsuite/gcc.target/arc/naked-1.c |  18 
>  gcc/testsuite/gcc.target/arc/naked-2.c |  26 ++
>  6 files changed, 197 insertions(+), 68 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arc/naked-1.c
>  create mode 100644 gcc/testsuite/gcc.target/arc/naked-2.c
> 
> diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
> index 4ff8e9b..b436dbe 100644
> --- a/gcc/config/arc/arc-protos.h
> +++ b/gcc/config/arc/arc-protos.h
> @@ -45,12 +45,10 @@ extern void arc_expand_atomic_op (enum rtx_code, rtx, 
> rtx, rtx, rtx, rtx);
>  extern void arc_split_compare_and_swap (rtx *);
>  extern void arc_expand_compare_and_swap (rtx *);
>  extern bool compact_memory_operand_p (rtx, machine_mode, bool, bool);
> +extern int arc_return_address_register (unsigned int);
> +extern unsigned int arc_compute_function_type (struct function *);
>  #endif /* RTX_CODE */
>  
> -#ifdef TREE_CODE
> -extern enum arc_function_type arc_compute_function_type (struct function *);
> -#endif /* TREE_CODE */
> -
>  extern bool arc_ccfsm_branch_deleted_p (void);
>  extern void arc_ccfsm_record_branch_deleted (void);
>  
> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
> index a65fc3a..7dfc68e 100644
> --- a/gcc/config/arc/arc.c
> +++ b/gcc/config/arc/arc.c
> @@ -211,6 +211,7 @@ static int rgf_banked_register_count;
>  static int get_arc_condition_code (rtx);
>  
>  static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
> +static tree arc_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
>  
>  /* Initialized arc_attribute_table to NULL since arc doesnot have any
> machine specific supported attributes.  */
> @@ -229,6 +230,9 @@ const struct attribute_spec arc_attribute_table[] =
>/* And these functions are always known to reside within the 21 bit
>   addressing range of blcc.  */
>{ "short_call",   0, 0, false, true,  true,  NULL, false },
> +  /* Function which are not having the prologue and epilogue generated
> + by the compiler.  */
> +  { "naked", 0, 0, true, false, false, arc_handle_fndecl_attribute, false },
>{ NULL, 0, 0, false, false, false, NULL, false }
>  };
>  static int arc_comp_type_attributes (const_tree, const_tree);
> @@ -513,6 +517,12 @@ static void arc_finalize_pic (void);
>  #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P 

Re: [PATCH, rev 2] PR target/79799, Add vec_insert of V4SFmode on PowerPC ISA 3.0 (power9)

2017-06-16 Thread Michael Meissner
On Fri, Jun 16, 2017 at 02:52:46PM -0500, Segher Boessenkool wrote:
> Hi Mike,
> 
> On Thu, Jun 15, 2017 at 10:10:28PM -0400, Michael Meissner wrote:
> > +(define_insn_and_split "vsx_set_v4sf_p9"
> > +  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
> > +   (unspec:V4SF
> > +[(match_operand:V4SF 1 "gpc_reg_operand" "0")
> > + (match_operand:SF 2 "gpc_reg_operand" "ww")
> > + (match_operand:QI 3 "const_0_to_3_operand" "n")]
> > +UNSPEC_VSX_SET))
> > +   (clobber (match_scratch:SI 4 "="))]
> > +  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR"
> > +  "#"
> > +  "&& reload_completed"
> 
> I still don't think it is such a good idea to do all of this not until
> after reload.  It does of course allow you to play tricks with changing
> register mode at will, like you do ;-)

The problem is MODES_TIEABLE_P.  V4S{I,F}mode and SImode cannot be tied
together (i.e. use gen_lowpart to change the mode and use a SUBREG).  So after
reload, we can just use gen_rtx_REG (...) to change the register type, but
before reload, by creating the SUBREG, it can lead to various aborts if rtl
checking is turned on.

> All these unspecs are a similar problem: the RTL optimisers cannot do
> much at all with it.

I don't think there is a good way to represent a vec_insert.  And vec_extract
can't represent a variable extract either.

> > +  [(set_attr "type" "vecperm")

I generally use the type of the last insn.  I am open to other suggestions.

> Is that a good type for this?  I think the convert is more expensive
> than the permutes?  If so, that would be better (of course it only
> matters for sched1, not super important).
> 
> > --- gcc/testsuite/gcc.target/powerpc/pr79799-1.c(nonexistent)
> > +++ gcc/testsuite/gcc.target/powerpc/pr79799-1.c(working copy)
> > @@ -0,0 +1,43 @@
> > +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
> 
> Why not powerpc*-*-*?

Well as it turns out, it aborts in 32-bit, because -mvsx-small-integer is not
enabled, and we can't have SImode in vector registers.  I'll have to add some
additional tests and resubmit the patch.

> 
> > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
> > "-mcpu=power9" } } */
> > +/* { dg-require-effective-target powerpc_p9vector_ok } */
> > +/* { dg-options "-mcpu=power9 -O2" } */
> > +
> > +#include 
> > +
> > +/* GCC 7.1 did not have a specialized method for inserting 32-bit floating 
> > point on
> > +   ISA 3.0 (power9) systems.  */
> 
> That first line is a bit long.

Ok.

> The patch is okay for trunk and 7 with the testsuite nits taken care of.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH] handle bzero/bcopy in DSE and aliasing (PR 80933, 80934)

2017-06-16 Thread Martin Sebor

+static bool
+gimple_fold_builtin_bcmp (gimple_stmt_iterator *gsi)
+{

+  /* Transform bcmp (a, b, len) into memcmp (a, b, len).  */
+
+  gimple *stmt = gsi_stmt (*gsi);
+  tree a = gimple_call_arg (stmt, 0);
+  tree b = gimple_call_arg (stmt, 1);
+  tree len = gimple_call_arg (stmt, 2);
+
+  gimple_seq seq = NULL;
+  gimple *repl = gimple_build_call (fn, 3, a, b, len);
+  gimple_seq_add_stmt_without_update (, repl);
+  gsi_replace_with_seq_vops (gsi, seq);

given they have the same prototype you can do like gimple_fold_builtin_stpcpy:

  gimple_call_set_fndecl (stmt, fn);
  fold_stmt (gsi);

That works even with bcopy -> memmove if you swap arguments.

Ok with that changes.


I made these changes along with those mentioned downthread and
committed r249278.

Martin


Improve force_edge_cold

2017-06-16 Thread Jan Hubicka
Hi,
force_edge_cold can be more aggressive about propagating zero counts.

Honza

Bootstrapped/regtested x86_64-linux, will commit it later today.

* predict.c (force_edge_cold): Handle declaring edges impossible
more aggresively.
Index: predict.c
===
--- predict.c   (revision 249244)
+++ predict.c   (working copy)
@@ -3968,6 +3968,7 @@ force_edge_cold (edge e, bool impossible
   profile_count old_count = e->count;
   int old_probability = e->probability;
   int prob_scale = REG_BR_PROB_BASE;
+  bool uninitialized_exit = false;
 
   /* If edge is already improbably or cold, just return.  */
   if (e->probability <= (impossible ? PROB_VERY_UNLIKELY : 0)
@@ -3978,6 +3979,8 @@ force_edge_cold (edge e, bool impossible
   {
if (e2->count.initialized_p ())
  count_sum += e2->count;
+   else
+ uninitialized_exit = true;
prob_sum += e2->probability;
   }
 
@@ -3989,7 +3992,7 @@ force_edge_cold (edge e, bool impossible
 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
   if (impossible)
e->count = profile_count::zero ();
-  if (old_probability)
+  else if (old_probability)
e->count = e->count.apply_scale (e->probability, old_probability);
   else
 e->count = e->count.apply_scale (1, REG_BR_PROB_BASE);
@@ -4016,6 +4019,34 @@ force_edge_cold (edge e, bool impossible
   else
 {
   e->probability = REG_BR_PROB_BASE;
+  if (e->src->count == profile_count::zero ())
+   return;
+  if (count_sum == profile_count::zero () && !uninitialized_exit
+ && impossible)
+   {
+ bool found = false;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (e->src);
+  !gsi_end_p (gsi); gsi_next ())
+   {
+ if (stmt_can_terminate_bb_p (gsi_stmt (gsi)))
+   {
+ found = true;
+ break;
+   }
+   }
+ if (!found)
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file,
+"Making bb %i impossible and dropping count to 0.\n",
+e->src->index);
+ e->count = profile_count::zero ();
+ e->src->count = profile_count::zero ();
+ FOR_EACH_EDGE (e2, ei, e->src->preds)
+   force_edge_cold (e2, impossible);
+ return;
+   }
+   }
 
   /* If we did not adjusting, the source basic block has no likely edeges
 leaving other direction. In that case force that bb cold, too.


Fix profile update in unrolling

2017-06-16 Thread Jan Hubicka
hi,
this patch make unroller to update profile slightly beter by updating outgoing
probabilities when exit is proved to be taken.

Bootstrapped/regtested x86_64-linux, will commit it later today.

Honza

* tree-ssa-loop-ivcanon.c (remove_exits_and_undefined_stmts): Update
profile.
(try_unroll_loop_completely): Fix reporting.
Index: tree-ssa-loop-ivcanon.c
===
--- tree-ssa-loop-ivcanon.c (revision 249244)
+++ tree-ssa-loop-ivcanon.c (working copy)
@@ -529,6 +529,8 @@ remove_exits_and_undefined_stmts (struct
}
  if (!loop_exit_edge_p (loop, exit_edge))
exit_edge = EDGE_SUCC (bb, 1);
+ exit_edge->probability = REG_BR_PROB_BASE;
+ exit_edge->count = exit_edge->src->count;
  gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
  gcond *cond_stmt = as_a  (elt->stmt);
  if (exit_edge->flags & EDGE_TRUE_VALUE)
@@ -853,8 +855,9 @@ try_unroll_loop_completely (struct loop
 loop->num);
  return false;
}
-  dump_printf_loc (report_flags, locus,
-   "loop turned into non-loop; it never loops.\n");
+  if (!n_unroll)
+dump_printf_loc (report_flags, locus,
+ "loop turned into non-loop; it never loops.\n");
 
   initialize_original_copy_tables ();
   auto_sbitmap wont_exit (n_unroll + 1);


Fix profile update in tail merging

2017-06-16 Thread Jan Hubicka
Hi,
profile updating in tail merging is completely wrong when profile feedback
is missing or 0.  This fixes it hopefully reasonably.
Bootstrapped/regtested x86_64-linux.

Honza

* tree-ssa-tail-merge.c (replace_block_by): Fix profile updating.
Index: tree-ssa-tail-merge.c
===
--- tree-ssa-tail-merge.c   (revision 249244)
+++ tree-ssa-tail-merge.c   (working copy)
@@ -1555,29 +1555,51 @@ replace_block_by (basic_block bb1, basic
   pred_edge, UNKNOWN_LOCATION);
 }
 
-  bb2->frequency += bb1->frequency;
-  if (bb2->frequency > BB_FREQ_MAX)
-bb2->frequency = BB_FREQ_MAX;
-
   bb2->count += bb1->count;
 
   /* Merge the outgoing edge counts from bb1 onto bb2.  */
   profile_count out_sum = profile_count::zero ();
+  int out_freq_sum = 0;
+
+  /* Recompute the edge probabilities from the new merged edge count.
+ Use the sum of the new merged edge counts computed above instead
+ of bb2's merged count, in case there are profile count insanities
+ making the bb count inconsistent with the edge weights.  */
+  FOR_EACH_EDGE (e1, ei, bb1->succs)
+{
+  if (e1->count.initialized_p ())
+   out_sum += e1->count;
+  out_freq_sum += EDGE_FREQUENCY (e1);
+}
+  FOR_EACH_EDGE (e1, ei, bb2->succs)
+{
+  if (e1->count.initialized_p ())
+   out_sum += e1->count;
+  out_freq_sum += EDGE_FREQUENCY (e1);
+}
+
   FOR_EACH_EDGE (e1, ei, bb1->succs)
 {
   e2 = find_edge (bb2, e1->dest);
   gcc_assert (e2);
   e2->count += e1->count;
+  if (out_sum > 0 && e2->count.initialized_p ())
+   {
+ e2->probability = e2->count.probability_in (bb2->count);
+   }
+  else if (bb1->frequency && bb2->frequency)
+   e2->probability = e1->probability;
+  else if (bb2->frequency && !bb1->frequency)
+   ;
+  else if (out_freq_sum)
+   e2->probability = GCOV_COMPUTE_SCALE (EDGE_FREQUENCY (e1)
+ + EDGE_FREQUENCY (e2),
+ out_freq_sum);
   out_sum += e2->count;
 }
-  /* Recompute the edge probabilities from the new merged edge count.
- Use the sum of the new merged edge counts computed above instead
- of bb2's merged count, in case there are profile count insanities
- making the bb count inconsistent with the edge weights.  */
-  FOR_EACH_EDGE (e2, ei, bb2->succs)
-{
-  e2->probability = e2->count.probability_in (out_sum);
-}
+  bb2->frequency += bb1->frequency;
+  if (bb2->frequency > BB_FREQ_MAX)
+bb2->frequency = BB_FREQ_MAX;
 
   /* Move over any user labels from bb1 after the bb2 labels.  */
   gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);


Re: [PATCH 2/7] [ARC] Define ADDITIONAL_REGISTER_NAMES.

2017-06-16 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-06-01 15:34:52 
+0200]:

> This macro is needed to be used with -ffixed- option, and inline asm.
> 
> gcc/
> 2017-01-09  Claudiu Zissulescu  
> 
>   * config/arc/arc.h (ADDITIONAL_REGISTER_NAMES): Define.

This looks fine and could be applied straight away I think.

Thanks,
Andrew

> ---
>  gcc/config/arc/arc.h | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
> index 16d5319..585e98c 100644
> --- a/gcc/config/arc/arc.h
> +++ b/gcc/config/arc/arc.h
> @@ -1262,6 +1262,13 @@ extern char rname56[], rname57[], rname58[], rname59[];
>"lp_start", "lp_end" \
>  }
>  
> +#define ADDITIONAL_REGISTER_NAMES\
> +{\
> +  {"ilink",  29},\
> +  {"r29",29},\
> +  {"r30",30} \
> +}
> +
>  /* Entry to the insn conditionalizer.  */
>  #define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
>arc_final_prescan_insn (INSN, OPVEC, NOPERANDS)
> -- 
> 1.9.1
> 


Re: [PATCH 3/7] [ARC] [LRA] Fix tests asm constraints.

2017-06-16 Thread Andrew Burgess
* Claudiu Zissulescu  [2017-06-01 15:34:53 
+0200]:

> LRA doesn't like the 'X' constraint as used in our tests, remove it.
> 
> gcc/testsuite
> 2017-01-09  Claudiu Zissulescu  
> 
>   * gcc.target/arc/mulsi3_highpart-1.c: Remove 'X' constraint.
>   * gcc.target/arc/mulsi3_highpart-2.c: Likewise.

This looks fine and could be applied straight away I think.

Thanks,
Andrew



> ---
>  gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c | 2 +-
>  gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c 
> b/gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c
> index 57cb95b..5fd6c36 100644
> --- a/gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c
> +++ b/gcc/testsuite/gcc.target/arc/mulsi3_highpart-1.c
> @@ -7,7 +7,7 @@
>  static int
>  id (int i)
>  {
> -  asm ("": "+Xr" (i));
> +  asm ("": "+r" (i));
>return i;
>  }
>  
> diff --git a/gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c 
> b/gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c
> index 287d96d..6ec4bc5 100644
> --- a/gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c
> +++ b/gcc/testsuite/gcc.target/arc/mulsi3_highpart-2.c
> @@ -9,7 +9,7 @@
>  static int
>  id (int i)
>  {
> -  asm ("": "+Xr" (i));
> +  asm ("": "+r" (i));
>return i;
>  }
>  
> -- 
> 1.9.1
> 


Re: [testsuite] Get rid of dg-skip-if etc. default args

2017-06-16 Thread Mike Stump
On Jun 15, 2017, at 3:49 PM, Rainer Orth  wrote:
> 
> It has long bothered me that many calls to dg-skip-if and its companions
> dg-xfail-if and dg-xfail-run-if in the testsuite are cluttered with the
> unnecessary default arguments ({ "*" } { "" } with and without braces).

> Now, this patch removes them wholesale.

Sounds good to me.



Re: [PATCH, rev 2] PR target/79799, Add vec_insert of V4SFmode on PowerPC ISA 3.0 (power9)

2017-06-16 Thread Segher Boessenkool
Hi Mike,

On Thu, Jun 15, 2017 at 10:10:28PM -0400, Michael Meissner wrote:
> +(define_insn_and_split "vsx_set_v4sf_p9"
> +  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
> + (unspec:V4SF
> +  [(match_operand:V4SF 1 "gpc_reg_operand" "0")
> +   (match_operand:SF 2 "gpc_reg_operand" "ww")
> +   (match_operand:QI 3 "const_0_to_3_operand" "n")]
> +  UNSPEC_VSX_SET))
> +   (clobber (match_scratch:SI 4 "="))]
> +  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR"
> +  "#"
> +  "&& reload_completed"

I still don't think it is such a good idea to do all of this not until
after reload.  It does of course allow you to play tricks with changing
register mode at will, like you do ;-)

All these unspecs are a similar problem: the RTL optimisers cannot do
much at all with it.

> +  [(set_attr "type" "vecperm")

Is that a good type for this?  I think the convert is more expensive
than the permutes?  If so, that would be better (of course it only
matters for sched1, not super important).

> --- gcc/testsuite/gcc.target/powerpc/pr79799-1.c  (nonexistent)
> +++ gcc/testsuite/gcc.target/powerpc/pr79799-1.c  (working copy)
> @@ -0,0 +1,43 @@
> +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */

Why not powerpc*-*-*?

> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
> "-mcpu=power9" } } */
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-options "-mcpu=power9 -O2" } */
> +
> +#include 
> +
> +/* GCC 7.1 did not have a specialized method for inserting 32-bit floating 
> point on
> +   ISA 3.0 (power9) systems.  */

That first line is a bit long.


The patch is okay for trunk and 7 with the testsuite nits taken care of.

Thanks,


Segher


C++ PATCH for c++/80639, ICE with invalid PMF initialization

2017-06-16 Thread Jason Merrill
In this testcase, instantiate_type resolves the overload, but the
result is a pointer to the wrong class; we need to then try to convert
it to the desired type in order to get the diagnostic we want.

Tested x86_64-pc-linux-gnu, applying to trunk and 7.
commit 9248f1e42712d3edfc5447e6df371b1458bc4f5e
Author: Jason Merrill 
Date:   Thu Jun 15 18:17:19 2017 -0400

PR c++/80639 - ICE with invalid PMF initialization.

PR c++/80043 - ICE with -fpermissive
* typeck.c (convert_for_assignment): Recurse when instantiate_type
returns without an error.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 05b4fbb..0f22e64 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -8590,9 +8590,10 @@ convert_for_assignment (tree type, tree rhs,
  if (rhstype == unknown_type_node)
{
  tree r = instantiate_type (type, rhs, tf_warning_or_error);
- /* -fpermissive might allow this.  */
+ /* -fpermissive might allow this; recurse.  */
  if (!seen_error ())
-   return r;
+   return convert_for_assignment (type, r, errtype, fndecl,
+  parmnum, complain, flags);
}
  else if (fndecl)
error ("cannot convert %qH to %qI for argument %qP to %qD",
diff --git a/gcc/testsuite/g++.dg/template/ptrmem31.C 
b/gcc/testsuite/g++.dg/template/ptrmem31.C
new file mode 100644
index 000..5c66b72
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/ptrmem31.C
@@ -0,0 +1,23 @@
+// PR c++/80639
+// { dg-do compile { target c++14 } }
+
+template < typename > struct A;
+
+struct B
+{ 
+  template < int > void m ();
+  template < int > struct K { static void n (); };
+  void p () { K < 0 >::n (); }
+};
+
+template <> struct A < B >
+{ 
+  using T = void (A::*)();
+  template < int u > static constexpr T h = ::m < u >; // { dg-error "cannot 
convert" }
+};
+
+template < int v > void B::K < v >::n ()
+{ 
+  using S = A < B >;
+  S::h < 0 >;
+}


C++ PATCH for c++/80465, ICE with generic lambda and noexcept

2017-06-16 Thread Jason Merrill
We need processing_template_decl to be set when we check
TYPE_NOTHROW_P; let's leave it set across a larger section of the
function.

Tested x86_64-pc-linux-gnu, applying to trunk and 7.
commit 91eb4dc35508aec3ea6d9939e49f691ac6c47231
Author: Jason Merrill 
Date:   Fri Jun 16 14:49:21 2017 -0400

PR c++/80465 - ICE with generic lambda with noexcept-specifier.

* lambda.c (maybe_add_lambda_conv_op): Keep processing_template_decl
set longer for a generic lambda.

diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index ee8784c..41d4921 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -987,6 +987,8 @@ maybe_add_lambda_conv_op (tree type)
null_pointer_node);
   if (generic_lambda_p)
 {
+  ++processing_template_decl;
+
   /* Prepare the dependent member call for the static member function
 '_FUN' and, potentially, prepare another call to be used in a decltype
 return expression for a deduced return call op to allow for simple
@@ -1036,9 +1038,7 @@ maybe_add_lambda_conv_op (tree type)
 
if (generic_lambda_p)
  {
-   ++processing_template_decl;
tree a = forward_parm (tgt);
-   --processing_template_decl;
 
CALL_EXPR_ARG (call, ix) = a;
if (decltype_call)
@@ -1062,11 +1062,9 @@ maybe_add_lambda_conv_op (tree type)
 {
   if (decltype_call)
{
- ++processing_template_decl;
  fn_result = finish_decltype_type
(decltype_call, /*id_expression_or_member_access_p=*/false,
 tf_warning_or_error);
- --processing_template_decl;
}
 }
   else
@@ -1084,6 +1082,9 @@ maybe_add_lambda_conv_op (tree type)
   && TYPE_NOTHROW_P (TREE_TYPE (callop)))
 stattype = build_exception_variant (stattype, noexcept_true_spec);
 
+  if (generic_lambda_p)
+--processing_template_decl;
+
   /* First build up the conversion op.  */
 
   tree rettype = build_pointer_type (stattype);
diff --git a/gcc/testsuite/g++.dg/cpp1z/noexcept-type17.C 
b/gcc/testsuite/g++.dg/cpp1z/noexcept-type17.C
new file mode 100644
index 000..46aefdd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/noexcept-type17.C
@@ -0,0 +1,7 @@
+// PR c++/80465
+// { dg-options -std=c++1z }
+
+int foo(...);
+int main() {
+  [](auto a) noexcept(noexcept(foo(a))){}(42);
+}


Re: [PATCH 00/30] [ARM] Reworking the -mcpu, -march and -mfpu options

2017-06-16 Thread Ramana Radhakrishnan
On Fri, Jun 16, 2017 at 10:11 PM, Richard Earnshaw
 wrote:
> On 13/06/17 18:35, Richard Earnshaw (lists) wrote:
>> On 09/06/17 13:53, Richard Earnshaw wrote:
>>>
>>> During the ARM BoF at the Cauldron last year I mentioned that I wanted
>>> to rework the way GCC on ARM handles the command line options.  The
>>> problem was that most users, and even many experts, can't remember
>>> which FPU/SIMD unit comes with which CPU and that consequently many
>>> users were inadvertenly generating sub-optimal code for their system.
>>>
>>> This patch series implements the proposed change and provides support
>>> for a generic way of adding optional features to architectures and CPU
>>> names.  The documentation patches at the end of the series explain the
>>> new syntax, so I won't repeat all that here.  Suffice to say here that
>>> the result is that the -mfpu option now defaults to 'auto', which
>>> allows the compiler to infer the floating-point and simd options from
>>> the CPU/architecture options and that these options can normally be
>>> expressed in a context-specific manner like +simd or +fp without
>>> having to know precisely which variant is implemented.  Long term I'd
>>> like to deprecate -mfpu and entirely move over to the new syntax; but
>>> it's too early to start that process now.
>>>
>>> All the patches in the series should build a working basic compiler,
>>> but the multilib selection will not work correctly until the relevant
>>> patches towards the end are applied.  It is not really feasible to
>>> retain that functionality without collapsing too many of the patches
>>> together into one hunk.  It's also possible that some tests in the
>>> testsuite may exhibit transient misbehaviour, but there should be no
>>> regressions by the end of the sequence (some tests no-longer run in
>>> the default configurations because the default CPU does not have
>>> floating-point support).
>>>
>>> Just two patches are to the generic code, but both are fairly trivial.
>>> One permits the sbitmap code to be used in the driver programs and the
>>> other provides a way of escaping the meta-character in some multilib
>>> reuse strings.
>>>
>>> I won't apply any of this series until those two patches have been
>>> approved, and I won't commit anything before the middle of next week
>>> even then.  This is a fairly complex change and it deserves some time
>>> for people to comment before committing.
>>>
>>
>> The attached is a roll-up of the entire series after the updates I've
>> posted today.  This should eliminate any potential issues with applying
>> the series for the purposes of testing.  Please don't post issues in
>> reply to this, but to the individual patches in the series.
>>
>> R.
>>
>
> This series has now been committed after rebasing on to the latest
> trunk.  Nothing material has changed so I'm not reposting the patches again.
>

Yay !

This is definitely a user visible change and it would be great to put
this into the release notes for GCC 8.

Ramana

> "Let the mayhem begin..."
>
> R.


RFC: C++ PATCH for c++/80831, ICE with -fsyntax-only

2017-06-16 Thread Jason Merrill
In this testcase, our c_parse_final_cleanups processing hits a
function that never got a cgraph node due to -fsyntax-only.  It seems
harmless to create one at this point; honza, does that make sense to
you?

Tested x86_64-pc-linux-gnu.
commit ccb40be7f9756533aeb658f092edd17ad0c7814e
Author: Jason Merrill 
Date:   Thu Jun 15 22:34:00 2017 -0400

PR c++/80831 - ICE with -fsyntax-only.

* decl2.c (c_parse_final_cleanups): Use cgraph_node::get_create.

diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c
index ab32b71..3863736 100644
--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -4646,6 +4646,8 @@ c_parse_final_cleanups (void)
  if (!DECL_SAVED_TREE (decl))
continue;
 
+ cgraph_node *node = cgraph_node::get_create (decl);
+
  /* We lie to the back end, pretending that some functions
 are not defined when they really are.  This keeps these
 functions from being put out unnecessarily.  But, we must
@@ -4666,9 +4668,6 @@ c_parse_final_cleanups (void)
  && DECL_INITIAL (decl)
  && decl_needed_p (decl))
{
- struct cgraph_node *node, *next;
-
- node = cgraph_node::get (decl);
  if (node->cpp_implicit_alias)
node = node->get_alias_target ();
 
@@ -4678,7 +4677,8 @@ c_parse_final_cleanups (void)
 group, we need to mark all symbols in the same comdat group
 that way.  */
  if (node->same_comdat_group)
-   for (next = dyn_cast (node->same_comdat_group);
+   for (cgraph_node *next
+  = dyn_cast (node->same_comdat_group);
 next != node;
 next = dyn_cast (next->same_comdat_group))
  next->call_for_symbol_thunks_and_aliases (clear_decl_external,
@@ -4692,7 +4692,7 @@ c_parse_final_cleanups (void)
  if (!DECL_EXTERNAL (decl)
  && decl_needed_p (decl)
  && !TREE_ASM_WRITTEN (decl)
- && !cgraph_node::get (decl)->definition)
+ && !node->definition)
{
  /* We will output the function; no longer consider it in this
 loop.  */
diff --git a/gcc/testsuite/g++.dg/other/fsyntax-only1.C 
b/gcc/testsuite/g++.dg/other/fsyntax-only1.C
new file mode 100644
index 000..19adb7e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/other/fsyntax-only1.C
@@ -0,0 +1,45 @@
+// PR c++/80831
+// { dg-options -fsyntax-only }
+// { dg-do compile { target c++11 } }
+
+class A
+{
+public:
+virtual ~A() { }
+};
+
+class B { };
+
+class C : public A { };
+
+template
+class D : public C
+{
+public:
+D() { }
+~D() { }
+};
+
+class E
+{
+public:
+static E& p();
+B q();
+template
+B q(void (J::*r)())
+{
+new D();
+return q();
+}
+};
+
+void t()
+{
+  class F
+  {
+  public:
+virtual void s() { }
+  };
+  E& x = E::p();
+  B y = x.q(::s);
+}


C++ PATCH for c++/80174, ICE with partial specialization of member template

2017-06-16 Thread Jason Merrill
My patch for 71747 missed this case; we only want to consider the
innermost args in the call to coerce_template_parms, just like in
unification and comparison.

Tested x86_64-pc-linux-gnu, applying to trunk and 7.
commit 33e24de5519e868a915ccee6fcfee362870930f6
Author: Jason Merrill 
Date:   Thu Jun 15 18:00:47 2017 -0400

PR c++/80174 - ICE with partial specialization of member template.

PR c++/71747
* pt.c (get_partial_spec_bindings): Only coerce innermost args.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 8a61b74..b055507 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -21676,9 +21676,11 @@ get_partial_spec_bindings (tree tmpl, tree spec_tmpl, 
tree args)
  `T' is `A' but unify () does not check whether `typename T::X'
  is `int'.  */
   spec_args = tsubst (spec_args, deduced_args, tf_none, NULL_TREE);
-  spec_args = coerce_template_parms (DECL_INNERMOST_TEMPLATE_PARMS (tmpl),
-spec_args, tmpl,
-tf_none, false, false);
+
+  if (spec_args != error_mark_node)
+spec_args = coerce_template_parms (DECL_INNERMOST_TEMPLATE_PARMS (tmpl),
+  INNERMOST_TEMPLATE_ARGS (spec_args),
+  tmpl, tf_none, false, false);
 
   pop_tinst_level ();
 
diff --git a/gcc/testsuite/g++.dg/template/partial-specialization6.C 
b/gcc/testsuite/g++.dg/template/partial-specialization6.C
new file mode 100644
index 000..51a1590
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/partial-specialization6.C
@@ -0,0 +1,28 @@
+// PR c++/80174
+
+typedef unsigned char uint8_t; 
+
+template 
+struct HighestMaxFieldIdx {
+  static const uint8_t maxFieldIdx = T::fieldIdx;
+};
+
+template 
+struct Outer {
+
+  template 
+  struct Varint {};
+
+
+  template 
+  struct Varint<_fieldIdx, uint8_t, field> {
+static const uint8_t fieldIdx = _fieldIdx;
+  };
+};
+
+struct Msg {
+  uint8_t a;
+
+  static const uint8_t t
+  = HighestMaxFieldIdx >::maxFieldIdx;
+};


[PATCH 1/2] i386: Consider Kaby Lake to be equivalent to Skylake

2017-06-16 Thread Matt Turner
Currently -march=native selects -march=broadwell on Kaby Lake systems,
since its model numbers are missing from the switch statement. It falls
back to the default case and chooses -march=broadwell because of the
presence of the ADX instruction set.

gcc/
* config/i386/driver-i386.c (host_detect_local_cpu): Add Kaby
Lake models to skylake case.

gcc/testsuite/

* gcc.target/i386/builtin_target.c: Add Kaby Lake models to
skylake check.

libgcc/

* config/i386/cpuinfo.c (get_intel_cpu): Add Kaby Lake models to
skylake case.
---
 gcc/config/i386/driver-i386.c  | 3 +++
 gcc/testsuite/gcc.target/i386/builtin_target.c | 3 +++
 libgcc/config/i386/cpuinfo.c   | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 6c812514239..09faad0af0e 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -781,6 +781,9 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
case 0x4e:
case 0x5e:
  /* Skylake.  */
+   case 0x8e:
+   case 0x9e:
+ /* Kaby Lake.  */
  cpu = "skylake";
  break;
case 0x57:
diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c 
b/gcc/testsuite/gcc.target/i386/builtin_target.c
index 374f0292453..9c190eb7ebc 100644
--- a/gcc/testsuite/gcc.target/i386/builtin_target.c
+++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
@@ -88,6 +88,9 @@ check_intel_cpu_model (unsigned int family, unsigned int 
model,
case 0x4e:
case 0x5e:
  /* Skylake.  */
+   case 0x8e:
+   case 0x9e:
+ /* Kaby Lake.  */
  assert (__builtin_cpu_is ("corei7"));
  assert (__builtin_cpu_is ("skylake"));
  break;
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c
index a1dc011525f..b008fb6e396 100644
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -183,6 +183,9 @@ get_intel_cpu (unsigned int family, unsigned int model, 
unsigned int brand_id)
case 0x4e:
case 0x5e:
  /* Skylake.  */
+   case 0x8e:
+   case 0x9e:
+ /* Kaby Lake.  */
  __cpu_model.__cpu_type = INTEL_COREI7;
  __cpu_model.__cpu_subtype = INTEL_COREI7_SKYLAKE;
  break;
-- 
2.13.0



[PATCH 2/2] i386: Assume Skylake for unknown models with clflushopt

2017-06-16 Thread Matt Turner
gcc/
* config/i386/driver-i386.c (host_detect_local_cpu): Assume
skylake for unknown models with clflushopt.
---
 gcc/config/i386/driver-i386.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 09faad0af0e..570c49031bd 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -797,6 +797,9 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
  /* Assume Knights Landing.  */
  if (has_avx512f)
cpu = "knl";
+ /* Assume Skylake.  */
+ else if (has_clflushopt)
+   cpu = "skylake";
  /* Assume Broadwell.  */
  else if (has_adx)
cpu = "broadwell";
-- 
2.13.0



Re: [PATCH, rev 2] PR target/79799, Add vec_insert of V4SFmode on PowerPC ISA 3.0 (power9)

2017-06-16 Thread Michael Meissner
On Fri, Jun 16, 2017 at 04:30:48PM -0500, Segher Boessenkool wrote:
> On Fri, Jun 16, 2017 at 04:26:58PM -0400, Michael Meissner wrote:
> > > > +  "&& reload_completed"
> > > 
> > > I still don't think it is such a good idea to do all of this not until
> > > after reload.  It does of course allow you to play tricks with changing
> > > register mode at will, like you do ;-)
> > 
> > The problem is MODES_TIEABLE_P.  V4S{I,F}mode and SImode cannot be tied
> > together (i.e. use gen_lowpart to change the mode and use a SUBREG).  So 
> > after
> > reload, we can just use gen_rtx_REG (...) to change the register type, but
> > before reload, by creating the SUBREG, it can lead to various aborts if rtl
> > checking is turned on.
> 
> That sounds like a problem elsewhere?  Hrm.
> 
> > > All these unspecs are a similar problem: the RTL optimisers cannot do
> > > much at all with it.
> > 
> > I don't think there is a good way to represent a vec_insert.  And 
> > vec_extract
> > can't represent a variable extract either.
> 
> Yeah.  But especially for all this lane shuffling etc. the generic
> optimisers could do a good job, if only they knew how.  Maybe we need
> some new RTL codes.
> 
> > > > +  [(set_attr "type" "vecperm")
> > 
> > > Is that a good type for this?  I think the convert is more expensive
> > > than the permutes?  If so, that would be better (of course it only
> > > matters for sched1, not super important).
> > 
> > I generally use the type of the last insn.  I am open to other suggestions.
> 
> It should describe the resulting insns as a whole.  Picking the type of
> the most expensive insn is often a reasonable approximation; for integer
> insns "two" or "three" can be okay.
> 
> I don't think we can do much better currently.

Here is the latest patch that restricts the optimization to 64-bit (due to
needing VSX small integers).  I've done a full bootstrap/make check on a little
endian power8 system, and a build without bootstrap and make check on a little
endian power9 system.  Neither the power8 nor the power9 systems had any
regressions.  I'm also running a test on a big endian power7 system for
completeness.

Assuming the power7 test finishes without any regressions, can I check this
patch into the trunk and later the GCC 7 branch.

The main change was to restrict the optimization to 64-bit PowerPC that have
VSX small integer support turned on (default for 64-bit).  I did shorten the
one line in the testsuite that you mentioned.

[gcc]
2017-06-16  Michael Meissner  

PR target/79799
* config/rs6000/rs6000.c (rs6000_expand_vector_init): Add support
for doing vector set of SFmode on ISA 3.0.
* config/rs6000/vsx.md (vsx_set_v4sf_p9): Likewise.
(vsx_set_v4sf_p9_zero): Special case setting 0.0f to a V4SF
element.
(vsx_insert_extract_v4sf_p9): Add an optimization for inserting a
SFmode value into a V4SF variable that was extracted from another
V4SF variable without converting the element to double precision
and back to single precision vector format.
(vsx_insert_extract_v4sf_p9_2): Likewise.

[gcc/testsuite]
2017-06-16  Michael Meissner  

PR target/79799
* gcc.target/powerpc/pr79799-1.c: New test.
* gcc.target/powerpc/pr79799-2.c: Likewise.
* gcc.target/powerpc/pr79799-3.c: Likewise.
* gcc.target/powerpc/pr79799-4.c: Likewise.
* gcc.target/powerpc/pr79799-5.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 249175)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -7451,6 +7451,8 @@ rs6000_expand_vector_set (rtx target, rt
insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
  else if (mode == V16QImode)
insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
+ else if (mode == V4SFmode)
+   insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
}
 
   if (insn)
Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md(revision 249175)
+++ gcc/config/rs6000/vsx.md(working copy)
@@ -3012,6 +3012,134 @@ (define_insn "vsx_set__p9"
 }
   [(set_attr "type" "vecperm")])
 
+(define_insn_and_split "vsx_set_v4sf_p9"
+  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+   (unspec:V4SF
+[(match_operand:V4SF 1 "gpc_reg_operand" "0")
+ (match_operand:SF 2 "gpc_reg_operand" "ww")
+ (match_operand:QI 3 "const_0_to_3_operand" "n")]
+UNSPEC_VSX_SET))
+   (clobber (match_scratch:SI 4 "="))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+   && 

C++ PATCH for c++/81102, wrong error with partial specialization

2017-06-16 Thread Jason Merrill
Two issues here: one, we were failing to look through references when
comparing types, so we decided that a function type in a template
non-type argument didn't match a reference-to-function type in the
type of the non-type parameter.

With that fixed, I also needed to move the C++17 non-type auto
handling down past that type comparison.

Tested x86_64-pc-linux-gnu, applying to trunk and 7.
commit ecd071290cad1fcfb33eee3889f94dd1e9374b24
Author: Jason Merrill 
Date:   Fri Jun 16 13:10:53 2017 -0400

PR c++/81102 - Wrong error with partial specialization.

* pt.c (unify) [TEMPLATE_PARM_INDEX]: Strip reference when comparing
types.  Do type deduction later.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index b055507..8c6499f 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -20632,18 +20632,6 @@ unify (tree tparms, tree targs, tree parm, tree arg, 
int strict,
  return x;
}
 
-  if (cxx_dialect >= cxx1z
- /* We deduce from array bounds in try_array_deduction.  */
- && !(strict & UNIFY_ALLOW_INTEGER)
- && uses_template_parms (TREE_TYPE (parm))
- && !type_uses_auto (TREE_TYPE (parm)))
-   {
- tree atype = TREE_TYPE (arg);
- RECUR_AND_CHECK_FAILURE (tparms, targs,
-  TREE_TYPE (parm), atype,
-  UNIFY_ALLOW_NONE, explain_p);
-   }
-
   /* [temp.deduct.type] If, in the declaration of a function template
 with a non-type template-parameter, the non-type
 template-parameter is used in an expression in the function
@@ -20664,7 +20652,8 @@ unify (tree tparms, tree targs, tree parm, tree arg, 
int strict,
/* Template-parameter dependent expression.  Just accept it for now.
   It will later be processed in convert_template_argument.  */
;
-  else if (same_type_p (TREE_TYPE (arg), tparm))
+  else if (same_type_p (non_reference (TREE_TYPE (arg)),
+   non_reference (tparm)))
/* OK */;
   else if ((strict & UNIFY_ALLOW_INTEGER)
   && CP_INTEGRAL_TYPE_P (tparm))
@@ -20673,9 +20662,22 @@ unify (tree tparms, tree targs, tree parm, tree arg, 
int strict,
   corresponding parameter.  */
arg = fold (build_nop (tparm, arg));
   else if (uses_template_parms (tparm))
-   /* We haven't deduced the type of this parameter yet.  Try again
-  later.  */
-   return unify_success (explain_p);
+   {
+ /* We haven't deduced the type of this parameter yet.  */
+ if (cxx_dialect >= cxx1z
+ /* We deduce from array bounds in try_array_deduction.  */
+ && !(strict & UNIFY_ALLOW_INTEGER))
+   {
+ /* Deduce it from the non-type argument.  */
+ tree atype = TREE_TYPE (arg);
+ RECUR_AND_CHECK_FAILURE (tparms, targs,
+  tparm, atype,
+  UNIFY_ALLOW_NONE, explain_p);
+   }
+ else
+   /* Try again later.  */
+   return unify_success (explain_p);
+   }
   else
return unify_type_mismatch (explain_p, tparm, TREE_TYPE (arg));
 
diff --git a/gcc/testsuite/g++.dg/template/partial-specialization7.C 
b/gcc/testsuite/g++.dg/template/partial-specialization7.C
new file mode 100644
index 000..aa42191
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/partial-specialization7.C
@@ -0,0 +1,40 @@
+// PR c++/81102
+
+template 
+struct HelperWrapper;
+
+// [...]
+
+template 
+struct HelperWrapper
+{
+static inline int WrapFuncT(const int)
+{
+return 0; // Changed
+}
+};
+
+// Unary
+template 
+struct HelperWrapper
+{
+static inline int WrapFuncT(const int)
+{
+return 1; // Changed
+}
+};
+
+// Binary
+template 
+struct HelperWrapper
+{
+static inline int WrapFuncT(const int)
+{
+return 2; // Changed
+}
+};
+
+int main()
+{
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/template/partial5.C 
b/gcc/testsuite/g++.dg/template/partial5.C
index 1b56fb3..ee45a93 100644
--- a/gcc/testsuite/g++.dg/template/partial5.C
+++ b/gcc/testsuite/g++.dg/template/partial5.C
@@ -14,7 +14,7 @@ template
 struct Y { };
 
 template
-struct Y { }; // { dg-error "" }
+struct Y { }; // { dg-error "" "" { target { ! c++1z } } }
 
 
 template


C++ PATCH for c++/80614, wrong mangling for C++17 noexcept type

2017-06-16 Thread Jason Merrill
My earlier patch for noexcept types preserved the noexcept in
canonicalize_for_substitution, but lost it in write_type.

Tested x86_64-pc-linux-gnu, applying to trunk and 7.
commit 152635968e18ec06da5308102c4859523c897707
Author: Jason Merrill 
Date:   Fri Jun 16 14:23:54 2017 -0400

PR c++/80614 - Wrong mangling for C++17 noexcept type

* mangle.c (write_type): Put the eh spec back on the function type.

diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index e866675..c83aef5 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -2100,6 +2100,11 @@ write_type (tree type)
  || TREE_CODE (t) == METHOD_TYPE)
{
  t = build_ref_qualified_type (t, type_memfn_rqual (type));
+ if (flag_noexcept_type)
+   {
+ tree r = TYPE_RAISES_EXCEPTIONS (type);
+ t = build_exception_variant (t, r);
+   }
  if (abi_version_at_least (8)
  || type == TYPE_MAIN_VARIANT (type))
/* Avoid adding the unqualified function type as a substitution.  */
diff --git a/gcc/testsuite/g++.dg/cpp1z/noexcept-type16.C 
b/gcc/testsuite/g++.dg/cpp1z/noexcept-type16.C
new file mode 100644
index 000..8c763a5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/noexcept-type16.C
@@ -0,0 +1,11 @@
+// PR c++/80614
+// { dg-options -std=c++1z }
+
+template  void fn() {}
+
+int main() {
+  // { dg-final { scan-assembler "_Z2fnIKFvvEEvv" } }
+  fn();
+  // { dg-final { scan-assembler "_Z2fnIKDoFvvEEvv" } }
+  fn();
+}


[Neon intrinsics] Literal vector construction through vcombine is poor

2017-06-16 Thread Michael Collison
This patch improves code generation for literal vector construction by 
expanding and exposing the pattern to rtl optimization earlier. The current 
implementation delays splitting the pattern until after reload which results in 
poor code generation for the following code:


#include "arm_neon.h"

int16x8_t
foo ()
{
  return vcombine_s16 (vdup_n_s16 (0), vdup_n_s16 (8));
}

Trunk generates:

foo:
moviv1.2s, 0
moviv0.4h, 0x8
dup d2, v1.d[0]
ins v2.d[1], v0.d[0]
orr v0.16b, v2.16b, v2.16b
ret

With the patch we now generate:

foo:
moviv1.4h, 0x8
moviv0.4s, 0
ins v0.d[1], v1.d[0]
ret

Bootstrapped and tested on aarch64-linux-gnu. Okay for trunk.

2017-06-15  Michael Collison  

* config/aarch64/aarch64-simd.md(aarch64_combine_internal):
Convert from define_insn_and_split into define_expand
* config/aarch64/aarch64.c(aarch64_split_simd_combine):
Allow register and subreg operands.


pr7057.patch
Description: pr7057.patch


Re: [PATCH 00/30] [ARM] Reworking the -mcpu, -march and -mfpu options

2017-06-16 Thread Richard Earnshaw
On 13/06/17 18:35, Richard Earnshaw (lists) wrote:
> On 09/06/17 13:53, Richard Earnshaw wrote:
>>
>> During the ARM BoF at the Cauldron last year I mentioned that I wanted
>> to rework the way GCC on ARM handles the command line options.  The
>> problem was that most users, and even many experts, can't remember
>> which FPU/SIMD unit comes with which CPU and that consequently many
>> users were inadvertenly generating sub-optimal code for their system.
>>
>> This patch series implements the proposed change and provides support
>> for a generic way of adding optional features to architectures and CPU
>> names.  The documentation patches at the end of the series explain the
>> new syntax, so I won't repeat all that here.  Suffice to say here that
>> the result is that the -mfpu option now defaults to 'auto', which
>> allows the compiler to infer the floating-point and simd options from
>> the CPU/architecture options and that these options can normally be
>> expressed in a context-specific manner like +simd or +fp without
>> having to know precisely which variant is implemented.  Long term I'd
>> like to deprecate -mfpu and entirely move over to the new syntax; but
>> it's too early to start that process now.
>>
>> All the patches in the series should build a working basic compiler,
>> but the multilib selection will not work correctly until the relevant
>> patches towards the end are applied.  It is not really feasible to
>> retain that functionality without collapsing too many of the patches
>> together into one hunk.  It's also possible that some tests in the
>> testsuite may exhibit transient misbehaviour, but there should be no
>> regressions by the end of the sequence (some tests no-longer run in
>> the default configurations because the default CPU does not have
>> floating-point support).
>>
>> Just two patches are to the generic code, but both are fairly trivial.
>> One permits the sbitmap code to be used in the driver programs and the
>> other provides a way of escaping the meta-character in some multilib
>> reuse strings.
>>
>> I won't apply any of this series until those two patches have been
>> approved, and I won't commit anything before the middle of next week
>> even then.  This is a fairly complex change and it deserves some time
>> for people to comment before committing.
>>
> 
> The attached is a roll-up of the entire series after the updates I've
> posted today.  This should eliminate any potential issues with applying
> the series for the purposes of testing.  Please don't post issues in
> reply to this, but to the individual patches in the series.
> 
> R.
> 

This series has now been committed after rebasing on to the latest
trunk.  Nothing material has changed so I'm not reposting the patches again.

"Let the mayhem begin..."

R.


Re: [PATCH v4, rs6000] gcc mainline, add builtin support for vec_float, vec_float2, vec_floate, vec_floate, builtins

2017-06-16 Thread Segher Boessenkool
Hi Carl,

On Fri, Jun 16, 2017 at 09:23:06AM -0700, Carl Love wrote:
>   * config/rs6000/rs6000-c.c (altivec_overloaded_builtins[]): Add
>   definitions for vec_float, vec_float2, vec_floato,
>   vec_floate built-ins.

No [], just the name.

>   * config/rs6000/rs6000-builtin.def (FLOAT2_V2DI, FLOATE_V2D*,
>   FLOATO_V2D*, XVCVSXWSP_V4SF, UNS_FLOATO_V2DI, UNS_FLOATE_V2DI): Add
>   definitions.

Please spell out FLOATE_V2DF, FLOATE_V2DI -- it's only two of-em, and
it makes things easier to find.

>   * config/altivec.md (define_insn "p8_vmrgew_",
>   define_mode_attr VF_sxddp):Add V4SF type to p8_vmrgew.

Space after colon.

>   * gcc.target/powerpc/builtins-3-runnable.c (test_result_sp(),
>   main()): Add runnable tests and test checker for vec_float,
>   vec_float2, vec_floate and vec_floato builtins.

No () please.

> +(define_insn "vsx_xvcvsxwsp"
> +  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
> + (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
> +  UNSPEC_VSX_CVSXWSP))]
> +  "VECTOR_UNIT_VSX_P (V4SFmode)"
> +  "xvcvsxwsp %x0,%x1"
> +  [(set_attr "type" "vecdouble")])

Hrm, is that the best type?  Maybe vecfloat is better.

> +;; Generate floate
> +;; convert  double or long long signed to float
> +;;(Only even words are valid, BE numbering)

Single space before double; space before (.

> +(define_expand "floato"
> + [(use (match_operand:V4SF 0 "register_operand" "=wa"))
> +  (use (match_operand:VSX_D 1 "register_operand" "wa"))]
> + "VECTOR_UNIT_VSX_P (V4SFmode)"

These last three lines should be indented one more space.

> +{
> +  if (VECTOR_ELT_ORDER_BIG)
> +emit_insn (gen_vsx_xvcvsp (operands[0], operands[1]));
> +  else
> +{
> +  /* Shift left one word to put odd word correct location */
> +  rtx rtx_tmp;
> +  rtx rtx_val = GEN_INT (4);
> +
> +  rtx_tmp = gen_reg_rtx (V4SFmode);
> +  emit_insn (gen_vsx_xvcvsp (rtx_tmp, operands[1]));
> +  emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
> + rtx_tmp, rtx_tmp, rtx_val));

This indent should use tabs.  There are more like this.

Okay with those last trivialities fixed.  Thanks!


Segher


[PATCH, rs6000] Fix vec_mulo and vec_mule instruction generation

2017-06-16 Thread Carl Love
GCC Maintainers:

The support for the vec_mulo and vec_mule has yet another bug.  For the
case of signed/unsigned integer arguments the builtin generates the half
word instruction not the word instruction.  This patch fixes the issue.
The fix  has been tested and verified on powerpc64le-unknown-linux-gnu
(Power 8 LE)

Is the patch OK for gcc mainline?

  Carl Love


-
>From 3127a3f9c8480fde428c4a13bc37d6eaefd0edfe Mon Sep 17 00:00:00 2001
From: Carl Love 
Date: Fri, 16 Jun 2017 16:10:56 -0500
Subject: [PATCH] vec_mule, vec_mulo fix 2

gcc/ChangeLog:

2017-06-17  Carl Love  

* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
ALTIVEC_BUILTIN_VMULESW, ALTIVEC_BUILTIN_VMULEUW,
ALTIVEC_BUILTIN_VMULOSW, ALTIVEC_BUILTIN_VMULOUW enties.
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin(),
builtin_function_type()): Add needed ALTIVEC_BUILTIN_* case
statements.
* config/rs6000/altivec.md (define_c_enum "unspec",
define_expand "vec_widen_umult_even_v4si",
define_expand "vec_widen_smult_even_v4si",
define_expand "vec_widen_umult_odd_v4si",
define_expand "vec_widen_smult_odd_v4si",
define_insn "altivec_vmuleuw", define_insn "altivec_vmulesw",
define_insn "altivec_vmulouw",  define_insn "altivec_vmulosw"): Add
support to generate vmuleuw, vmulesw, vmulouw, vmulosw instructions.
* config/rs6000/rs6000-builtin.def (VMLEUW, VMULESW, VMULOUW,
VMULOSW): Add definitions.
---
 gcc/config/rs6000/altivec.md | 91

 gcc/config/rs6000/rs6000-builtin.def |  8 
 gcc/config/rs6000/rs6000-c.c | 12 +++--
 gcc/config/rs6000/rs6000.c   |  6 +++
 4 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 487b9a4..142300a 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -36,10 +36,14 @@
UNSPEC_VMULESB
UNSPEC_VMULEUH
UNSPEC_VMULESH
+   UNSPEC_VMULEUW
+   UNSPEC_VMULESW
UNSPEC_VMULOUB
UNSPEC_VMULOSB
UNSPEC_VMULOUH
UNSPEC_VMULOSH
+   UNSPEC_VMULOUW
+   UNSPEC_VMULOSW
UNSPEC_VPKPX
UNSPEC_VPACK_SIGN_SIGN_SAT
UNSPEC_VPACK_SIGN_UNS_SAT
@@ -1412,6 +1416,32 @@
   DONE;
 })
 
+(define_expand "vec_widen_umult_even_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+emit_insn (gen_altivec_vmuleuw (operands[0], operands[1],
operands[2]));
+  else
+emit_insn (gen_altivec_vmulouw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_even_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
operands[2]));
+  else
+emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
 (define_expand "vec_widen_umult_odd_v16qi"
   [(use (match_operand:V8HI 0 "register_operand" ""))
(use (match_operand:V16QI 1 "register_operand" ""))
@@ -1464,6 +1494,32 @@
   DONE;
 })
 
+(define_expand "vec_widen_umult_odd_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+emit_insn (gen_altivec_vmulouw (operands[0], operands[1],
operands[2]));
+  else
+emit_insn (gen_altivec_vmuleuw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
operands[2]));
+  else
+emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
 (define_insn "altivec_vmuleub"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
 (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1536,6 +1592,41 @@
   "vmulosh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmuleuw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+(unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+  (match_operand:V4SI 2 "register_operand" "v")]
+UNSPEC_VMULEUW))]
+  "TARGET_ALTIVEC"
+  "vmuleuw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouw"
+  [(set 

Re: [PATCH, rev 2] PR target/79799, Add vec_insert of V4SFmode on PowerPC ISA 3.0 (power9)

2017-06-16 Thread Segher Boessenkool
On Fri, Jun 16, 2017 at 04:26:58PM -0400, Michael Meissner wrote:
> > > +  "&& reload_completed"
> > 
> > I still don't think it is such a good idea to do all of this not until
> > after reload.  It does of course allow you to play tricks with changing
> > register mode at will, like you do ;-)
> 
> The problem is MODES_TIEABLE_P.  V4S{I,F}mode and SImode cannot be tied
> together (i.e. use gen_lowpart to change the mode and use a SUBREG).  So after
> reload, we can just use gen_rtx_REG (...) to change the register type, but
> before reload, by creating the SUBREG, it can lead to various aborts if rtl
> checking is turned on.

That sounds like a problem elsewhere?  Hrm.

> > All these unspecs are a similar problem: the RTL optimisers cannot do
> > much at all with it.
> 
> I don't think there is a good way to represent a vec_insert.  And vec_extract
> can't represent a variable extract either.

Yeah.  But especially for all this lane shuffling etc. the generic
optimisers could do a good job, if only they knew how.  Maybe we need
some new RTL codes.

> > > +  [(set_attr "type" "vecperm")
> 
> > Is that a good type for this?  I think the convert is more expensive
> > than the permutes?  If so, that would be better (of course it only
> > matters for sched1, not super important).
> 
> I generally use the type of the last insn.  I am open to other suggestions.

It should describe the resulting insns as a whole.  Picking the type of
the most expensive insn is often a reasonable approximation; for integer
insns "two" or "three" can be okay.

I don't think we can do much better currently.


Segher


[PATCH rs6000] Fix for commit 249311

2017-06-16 Thread Carl Love
GCC maintainers:

Commit r249311 had an error.  During the patch review the define expand
for VFC_inst was changed to VF_sxddp.  I compiled and tested the source
after making the change and it seemed fine.  However, I missed a couple
of changes.  It seems that since I didn't remove all the binaries before
recompiling the build tree still had the old definition in it. I also
found I had to move the VF_sxddp definition back to the file where it is
used.  Need to make sure I do a clean build just to be sure before
committing things.

I found the issue after pulling down a fresh tree and compiling when the
build failed.  I have already applied the following change to the tree
as I didn't want to leave a broken tree all weekend. Please let me know
if there are any changes to this fix-up patch that you would like to see
made and I will take care of it. 

Sorry about breaking things.

  Carl Love


gcc/ChangeLog:

2017-06-16  Carl Love  

* config/rs6000/altivec.md (define_mode_attr VF_sxddp): Move to vsx.md.
* config/rs6000/vsx.md (define_mode_attr VF_sxddp
define_expand "floate",
define_expand "floato"): Add VF_sxddp definition, replace
undefined VFC_inst with VF_sxddp definition


Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md(revision 249336)
+++ gcc/config/rs6000/vsx.md(revision 249337)
@@ -21,6 +21,9 @@
 ;; Iterator for comparison types
 (define_code_iterator CMP_TEST [eq lt gt unordered])
 
+;; Mode attribute for vector floate and floato conversions
+(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
+
 ;; Iterator for both scalar and vector floating point types supported by VSX
 (define_mode_iterator VSX_B [DF V4SF V2DF])
 
@@ -2056,7 +2059,7 @@
 rtx_tmp, rtx_tmp, rtx_val));
 }
   else
-emit_insn (gen_vsx_xvcvsp (operands[0], operands[1]));
+emit_insn (gen_vsx_xvcvsp (operands[0], operands[1]));
 
   DONE;
 })
@@ -2095,7 +2098,7 @@
   "VECTOR_UNIT_VSX_P (V4SFmode)"
 {
   if (VECTOR_ELT_ORDER_BIG)
-emit_insn (gen_vsx_xvcvsp (operands[0], operands[1]));
+emit_insn (gen_vsx_xvcvsp (operands[0], operands[1]));
   else
 {
   /* Shift left one word to put odd word correct location */
@@ -2103,7 +2106,7 @@
   rtx rtx_val = GEN_INT (4);
 
   rtx_tmp = gen_reg_rtx (V4SFmode);
-  emit_insn (gen_vsx_xvcvsp (rtx_tmp, operands[1]));
+  emit_insn (gen_vsx_xvcvsp (rtx_tmp, operands[1]));
   emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
 rtx_tmp, rtx_tmp, rtx_val));
 }
Index: gcc/config/rs6000/altivec.md
===
--- gcc/config/rs6000/altivec.md(revision 249336)
+++ gcc/config/rs6000/altivec.md(revision 249337)
@@ -207,9 +207,6 @@
 ;; versus floating point
 (define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")])
 
-;; Mode attribute for vector floate and floato conversions
-(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
-
 ;; Specific iterator for parity which does not have a byte/half-word form, but
 ;; does have a quad word form
 (define_mode_iterator VParity [V4SI




Re: [PATCH, rs6000] (v3) Fold vector shifts in GIMPLE

2017-06-16 Thread Richard Biener
On Thu, Jun 15, 2017 at 7:53 PM, Segher Boessenkool
 wrote:
> Hi Will,
>
> On Tue, Jun 13, 2017 at 02:44:45PM -0500, Will Schmidt wrote:
>> I sniff-tested the latest changes on Power8, with good results.  Full
>> regtest running.  OK for trunk?
>
> This looks good to me, if Richard is happy with it now.  Some very minor
> things and it is good to go:

Yes, looks good to me.

Richard.

>>   2017-06-13  Will Schmidt  
>>
>>   * config/rs6000/rs6000.c: Add include of ssa-propagate.h for
>>   update_call_from_tree().
>>   (rs6000_gimple_fold_builtin): Add handling
>>   for early expansion of vector shifts (sl,sr,sra,rl).
>
> Funny line break there.
>
>> +  /* Flavors of vector shift right algebraic.
>> +   * vec_sra{b,h,w} -> vsra{b,h,w}.  */
>
> No leading * in comments please (more of those below).
>
>> + /* convert arg0 to unsigned.  */
>
> Full sentences are capitalized.
>
>
> Segher


Re: [PATCH 07/30] [build] Make sbitmap code available to the driver programs

2017-06-16 Thread Richard Biener
On Wed, Jun 14, 2017 at 4:35 PM, Richard Earnshaw (lists)
 wrote:
> On 09/06/17 13:53, Richard Earnshaw wrote:
>>
>> The ARM option parsing code uses sbitmap data structures to manage
>> features and upcoming patches will shortly need to use these bitmaps
>> within the driver.  This patch moves sbitmap.o from OBJS to
>> OBJS-libcommon to facilitate this.
>>
>> The patch has no impact on targets that don't need this functionality,
>> since the object is part of an archive and will only be extracted if
>> needed.
>>
>>   * Makefile.in (OBJS): Move sbitmap.o from here ...
>>   (OBJS-libcommon): ... to here.
>
> It may have been missed given that most of this patch series is
> ARM-specific, that this patch and
> https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00629.html (patch 20) need
> review from a global maintainer/component maintainer.

Ok.

Richard.

> R.
>
>> ---
>>  gcc/Makefile.in | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>>
>> 0007-build-Make-sbitmap-code-available-to-the-driver-prog.patch
>>
>>
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index da98c8e..67d69c1 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -1441,7 +1441,6 @@ OBJS = \
>>   rtlanal.o \
>>   rtlhooks.o \
>>   run-rtl-passes.o \
>> - sbitmap.o \
>>   sched-deps.o \
>>   sched-ebb.o \
>>   sched-rgn.o \
>> @@ -1587,6 +1586,7 @@ OBJS = \
>>  OBJS-libcommon = diagnostic.o diagnostic-color.o diagnostic-show-locus.o \
>>   edit-context.o \
>>   pretty-print.o intl.o \
>> + sbitmap.o \
>>   vec.o input.o version.o hash-table.o ggc-none.o memory-block.o \
>>   selftest.o
>>
>>
>


Re: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.

2017-06-16 Thread Richard Sandiford
Tamar Christina  writes:
> Hi Richard,
>> > > +rtx tmp = gen_reg_rtx (SImode);
>> > > +aarch64_expand_mov_immediate (tmp, gen_int_mode (ival,
>> SImode));
>> > > +tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
>> >
>> > This looks wrong for big-endian, and...
>> >
>> > > +emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
>> >
>> > ...either it should be OK to go directly from tmp to the HFmode
>> > lowpart, or we should move the HImode temporary into a fresh REG.
>> > Current validate_subreg seems to suggest that we need the latter.
>> >
>> > Isn't it possible to use a HImode move immediate instead of an SImode
>> one?
>> 
>> We don't really have a movehi pattern, currently a movhi would end up
>> in the general mov_aarch64 pattern

movqi and movhi patterns are defined from the same mov template,
but they're still "proper" move patterns.

>> which would then use end up using a w register as well.

Isn't that what you want though?  f16_mov_immediate_1.c is testing for:

/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, #?19520"   3 } } 
*/

> Also aarch64_expand_mov_immediate doesn't allow HImode moves, only SI and DI.

It doesn't need to, because all HImode CONST_INTs are already legitimate.
You can just use emit_move_insn instead.

FWIW, the following seems to pass the same tests and avoids the subreg
dance.  Just a proof of concept, and I'm not attached to the new
iterator name.

Thanks,
Richard


Index: gcc/gcc/config/aarch64/aarch64.md
===
--- gcc.orig/gcc/config/aarch64/aarch64.md
+++ gcc/gcc/config/aarch64/aarch64.md
@@ -1063,7 +1063,28 @@
   }
 )
 
-(define_insn_and_split "*movhf_aarch64"
+(define_split
+  [(set (match_operand:GPF_MOV_F16 0 "nonimmediate_operand")
+(match_operand:GPF_MOV_F16 1 "immediate_operand"))]
+  "TARGET_FLOAT
+   && can_create_pseudo_p ()
+   && !aarch64_can_const_movi_rtx_p (operands[1], mode)
+   && !aarch64_float_const_representable_p (operands[1])
+   && aarch64_float_const_rtx_p (operands[1])"
+  [(const_int 0)]
+  {
+unsigned HOST_WIDE_INT ival;
+if (!aarch64_reinterpret_float_as_int (operands[1], ))
+  FAIL;
+
+rtx tmp = gen_reg_rtx (mode);
+emit_move_insn (tmp, gen_int_mode (ival, mode));
+emit_move_insn (operands[0], gen_lowpart (mode, tmp));
+DONE;
+  }
+)
+
+(define_insn "*movhf_aarch64"
   [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  
,w,m,r,m ,r")
(match_operand:HF 1 "general_operand"  "Y ,?rY, 
w,w,Ufc,Uvi,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
@@ -1080,28 +1101,12 @@
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
-  "&& can_create_pseudo_p ()
-   && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
-   && !aarch64_float_const_representable_p (operands[1])
-   &&  aarch64_float_const_rtx_p (operands[1])"
-  [(const_int 0)]
-  "{
-unsigned HOST_WIDE_INT ival;
-if (!aarch64_reinterpret_float_as_int (operands[1], ))
-  FAIL;
-
-rtx tmp = gen_reg_rtx (SImode);
-aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
-tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
-emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
-DONE;
-  }"
   [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
 neon_move,f_loads,f_stores,load1,store1,mov_reg")
(set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
 )
 
-(define_insn_and_split "*movsf_aarch64"
+(define_insn "*movsf_aarch64"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  
,w,m,r,m ,r,r")
(match_operand:SF 1 "general_operand"  "Y ,?rY, 
w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
@@ -1119,28 +1124,13 @@
str\\t%w1, %0
mov\\t%w0, %w1
mov\\t%w0, %1"
-  "&& can_create_pseudo_p ()
-   && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
-   && !aarch64_float_const_representable_p (operands[1])
-   &&  aarch64_float_const_rtx_p (operands[1])"
-  [(const_int 0)]
-  "{
-unsigned HOST_WIDE_INT ival;
-if (!aarch64_reinterpret_float_as_int (operands[1], ))
-  FAIL;
-
-rtx tmp = gen_reg_rtx (SImode);
-aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
-emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
-DONE;
-  }"
   [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
 f_loads,f_stores,load1,store1,mov_reg,\
 fconsts")
(set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
 )
 
-(define_insn_and_split "*movdf_aarch64"
+(define_insn "*movdf_aarch64"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  
,w,m,r,m ,r,r")
(match_operand:DF 1 "general_operand"  "Y , ?rY, 
w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
@@ -1158,21 +1148,6 @@
str\\t%x1, %0
mov\\t%x0, %x1
 

RE: [PATCH][GCC][AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.

2017-06-16 Thread Tamar Christina
> 
> It doesn't need to, because all HImode CONST_INTs are already legitimate.
> You can just use emit_move_insn instead.
> 

Ah right, that's true.

> FWIW, the following seems to pass the same tests and avoids the subreg
> dance.  Just a proof of concept, and I'm not attached to the new iterator
> name.

Ah thanks! that is a bit simpler. I'll take a similar approach. 

> Thanks,
> Richard
> 
> 
> Index: gcc/gcc/config/aarch64/aarch64.md
> ==
> =
> --- gcc.orig/gcc/config/aarch64/aarch64.md
> +++ gcc/gcc/config/aarch64/aarch64.md
> @@ -1063,7 +1063,28 @@
>}
>  )
> 
> -(define_insn_and_split "*movhf_aarch64"
> +(define_split
> +  [(set (match_operand:GPF_MOV_F16 0 "nonimmediate_operand")
> +(match_operand:GPF_MOV_F16 1 "immediate_operand"))]
> +  "TARGET_FLOAT
> +   && can_create_pseudo_p ()
> +   && !aarch64_can_const_movi_rtx_p (operands[1], mode)
> +   && !aarch64_float_const_representable_p (operands[1])
> +   && aarch64_float_const_rtx_p (operands[1])"
> +  [(const_int 0)]
> +  {
> +unsigned HOST_WIDE_INT ival;
> +if (!aarch64_reinterpret_float_as_int (operands[1], ))
> +  FAIL;
> +
> +rtx tmp = gen_reg_rtx (mode);
> +emit_move_insn (tmp, gen_int_mode (ival, mode));
> +emit_move_insn (operands[0], gen_lowpart (mode, tmp));
> +DONE;
> +  }
> +)
> +
> +(define_insn "*movhf_aarch64"
>[(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w
> ,w,m,r,m ,r")
>   (match_operand:HF 1 "general_operand"  "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r"))]
>"TARGET_FLOAT && (register_operand (operands[0], HFmode) @@ -
> 1080,28 +1101,12 @@
> ldrh\\t%w0, %1
> strh\\t%w1, %0
> mov\\t%w0, %w1"
> -  "&& can_create_pseudo_p ()
> -   && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> -   && !aarch64_float_const_representable_p (operands[1])
> -   &&  aarch64_float_const_rtx_p (operands[1])"
> -  [(const_int 0)]
> -  "{
> -unsigned HOST_WIDE_INT ival;
> -if (!aarch64_reinterpret_float_as_int (operands[1], ))
> -  FAIL;
> -
> -rtx tmp = gen_reg_rtx (SImode);
> -aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> -tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> -emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> -DONE;
> -  }"
>[(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
>neon_move,f_loads,f_stores,load1,store1,mov_reg")
> (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
>  )
> 
> -(define_insn_and_split "*movsf_aarch64"
> +(define_insn "*movsf_aarch64"
>[(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w
> ,w,m,r,m ,r,r")
>   (match_operand:SF 1 "general_operand"  "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
>"TARGET_FLOAT && (register_operand (operands[0], SFmode) @@ -
> 1119,28 +1124,13 @@
> str\\t%w1, %0
> mov\\t%w0, %w1
> mov\\t%w0, %1"
> -  "&& can_create_pseudo_p ()
> -   && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
> -   && !aarch64_float_const_representable_p (operands[1])
> -   &&  aarch64_float_const_rtx_p (operands[1])"
> -  [(const_int 0)]
> -  "{
> -unsigned HOST_WIDE_INT ival;
> -if (!aarch64_reinterpret_float_as_int (operands[1], ))
> -  FAIL;
> -
> -rtx tmp = gen_reg_rtx (SImode);
> -aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> -emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
> -DONE;
> -  }"
>[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
>f_loads,f_stores,load1,store1,mov_reg,\
>fconsts")
> (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
>  )
> 
> -(define_insn_and_split "*movdf_aarch64"
> +(define_insn "*movdf_aarch64"
>[(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w
> ,w,m,r,m ,r,r")
>   (match_operand:DF 1 "general_operand"  "Y , ?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
>"TARGET_FLOAT && (register_operand (operands[0], DFmode) @@ -
> 1158,21 +1148,6 @@
> str\\t%x1, %0
> mov\\t%x0, %x1
> mov\\t%x0, %1"
> -  "&& can_create_pseudo_p ()
> -   && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
> -   && !aarch64_float_const_representable_p (operands[1])
> -   &&  aarch64_float_const_rtx_p (operands[1])"
> -  [(const_int 0)]
> -  "{
> -unsigned HOST_WIDE_INT ival;
> -if (!aarch64_reinterpret_float_as_int (operands[1], ))
> -  FAIL;
> -
> -rtx tmp = gen_reg_rtx (DImode);
> -aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> -emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
> -DONE;
> -  }"
>[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
>f_loadd,f_stored,load1,store1,mov_reg,\
>fconstd")
> Index: gcc/gcc/config/aarch64/iterators.md
> ==
> =
> --- gcc.orig/gcc/config/aarch64/iterators.md
> +++ 

Re: [PING^3][RFC, PATCH][ASAN] Implement dynamic allocas/VLAs sanitization.​

2017-06-16 Thread Richard Biener
On Wed, 14 Jun 2017, Jakub Jelinek wrote:

> Hi!
> 
> So, I've tried to look at
> struct __attribute__((aligned (N))) S { char s[N]; };
> 
> void bar (struct S *, struct S *);
> 
> void
> foo (int x)
> {
>   struct S a;
>   {
> struct S b[x];
> bar (, [0]);
>   }
>   {
> struct S b[x + 4];
> bar (, [0]);
>   }
> }
> 
> void
> baz (int x)
> {
>   struct S a;
>   struct S b[x];
>   bar (, [0]);
> }
> testcase at -O2 -fsanitize=address -DN=64 (and -DN=8) on x86_64.
> Even in *.optimized dump I'm seeing:
>   _1 = (sizetype) x_4(D);
>   # RANGE [0, 18446744073709551552] NONZERO 18446744073709551552
>   _2 = _1 * 64;
>   # RANGE [0, 31] NONZERO 31
>   _24 = _2 & 31;
>   # RANGE ~[65, 127]
>   _19 = _2 + 128;
>   # RANGE ~[65, 96]
>   _27 = _19 - _24;
>   _28 = __builtin_alloca_with_align (_27, 512);
>   _29 = _28 + 64;
>   __builtin___asan_alloca_poison (_29, _2);
> which seems to be unnecessary complicated, as _2 has nonzero
> mask of 0xffc0 trying to and it with 0x1f should
> yield certainly _24 = 0 and thus there is no need to subtract anything.
> 
> I wonder if this is just because the asan1 pass is fairly late and say
> ccp isn't scheduled after it.  The question is if trying to use
> gimple_build APIs instead of gimple_build_assign would help here
> (and whether we'd need some new match.pd rules to figure out
> that if you have SSA_NAME & constant and get_nonzero_bits on the
> SSA_NAME & constant is 0, then the result is 0) or not.

The gimple_build API at the moment mirrors the behavior of building
a large GENERIC expr which means it will only match-and-simplify
stmts currently building (actually not yet associated with any BB).

So if you build _2 & 31 you get that expr folded with match.pd rules
(not sure if there is any yet doing the desired simplification to _2
using get_nonzero_bits).

If you are building a stmt at a time folding built stmts would
get you the same result (but then using the gimple_build helpers
is more powerful)

> Or you could just try to check get_nonzero_bits yourself and if
> all the bits you want to mask are clear, avoid the subtraction.
> 
> Also, isn't the size used for the adjusted __builtin_alloca_with_align
> too large?  If you need _2 initially, and alignment is 64 bytes,
> then you certainly need 64 bytes before (unless we want to go into too
> low-level backend details and say that we want to allocate ret + 32
> as 64-byte aligned), but 64 bytes after it is too much, 32 bytes would be
> enough (there is no partial right zone in this case)?
> 
> On Wed, Jun 14, 2017 at 04:21:48PM +0300, Maxim Ostapenko wrote:
> > +static void
> > +handle_builtin_alloca (gcall *call, gimple_stmt_iterator *iter)
> > +{
> > +  if (!iter)
> > +return;
> > +
> > +  gimple_seq seq = NULL;
> > +  gassign *g;
> > +  gcall *gg;
> > +  gimple_stmt_iterator gsi = *iter;
> > +  const HOST_WIDE_INT redzone_mask = ASAN_RED_ZONE_SIZE - 1;
> > +
> > +  tree last_alloca_addr = get_last_alloca_addr ();
> > +  tree callee = gimple_call_fndecl (call);
> > +  tree old_size = gimple_call_arg (call, 0);
> > +  tree ptr_type = gimple_call_lhs (call) ? TREE_TYPE (gimple_call_lhs 
> > (call))
> > +: ptr_type_node;
> > +  bool alloca_with_align
> > += DECL_FUNCTION_CODE (callee) == BUILT_IN_ALLOCA_WITH_ALIGN;
> > +  unsigned int align
> > += alloca_with_align ? tree_to_uhwi (gimple_call_arg (call, 1)) : 0;
> > +
> > +  /* If ALIGN > ASAN_RED_ZONE_SIZE, we embed left redzone into first ALIGN
> > + bytes of allocated space.  */
> > +  align = MAX (align, ASAN_RED_ZONE_SIZE * BITS_PER_UNIT);
> > +
> > +  tree alloca_rz_mask = build_int_cst (size_type_node, redzone_mask);
> > +  tree redzone_size = build_int_cst (size_type_node, ASAN_RED_ZONE_SIZE);
> > +
> > +  /* misalign = size & (ASAN_RED_ZONE_SIZE - 1)
> > + partial_size = ASAN_RED_ZONE_SIZE - misalign.  */
> > +  g = gimple_build_assign (make_ssa_name (size_type_node, NULL), 
> > BIT_AND_EXPR,
> > +  old_size, alloca_rz_mask);
> > +  gimple_seq_add_stmt_without_update (, g);
> > +  tree misalign = gimple_assign_lhs (g);
> > +  g = gimple_build_assign (make_ssa_name (size_type_node, NULL), 
> > MINUS_EXPR,
> > +  redzone_size, misalign);
> > +  gimple_seq_add_stmt_without_update (, g);
> > +  tree partial_size = gimple_assign_lhs (g);
> > +
> > +  /* padding = align + ASAN_RED_ZONE_SIZE;
> > + additional_size = padding + partial_size.  */
> > +  tree padding = build_int_cst (size_type_node,
> > +   align / BITS_PER_UNIT + ASAN_RED_ZONE_SIZE);
> > +  g = gimple_build_assign (make_ssa_name (size_type_node), PLUS_EXPR,
> > +  partial_size, padding);
> > +  gimple_seq_add_stmt_without_update (, g);
> > +  tree additional_size = gimple_assign_lhs (g);
> > +
> > +  /* new_size = old_size + additional_size.  */
> > +  g = gimple_build_assign (make_ssa_name (size_type_node), PLUS_EXPR, 
> > old_size,
> 

Re: [PATCH, ARM] Implement __ARM_FEATURE_COPROC coprocessor intrinsic feature macro

2017-06-16 Thread Prakhar Bahuguna
On 15/06/2017 17:23:43, Richard Earnshaw (lists) wrote:
> On 14/06/17 10:35, Prakhar Bahuguna wrote:
> > The ARM ACLE defines the __ARM_FEATURE_COPROC macro which indicates which
> > coprocessor intrinsics are available for the target. If 
> > __ARM_FEATURE_COPROC is
> > undefined, the target does not support coprocessor intrinsics. The feature
> > levels are defined as follows:
> > 
> > +-+---+--+
> > | **Bit** | **Value** | **Intrinsics Available** |
> > +-+---+--+
> > | 0   | 0x1   | __arm_cdp __arm_ldc, __arm_ldcl, __arm_stc,  |
> > | |   | __arm_stcl, __arm_mcr and __arm_mrc  |
> > +-+---+--+
> > | 1   | 0x2   | __arm_cdp2, __arm_ldc2, __arm_stc2, __arm_ldc2l, |
> > | |   | __arm_stc2l, __arm_mcr2 and __arm_mrc2   |
> > +-+---+--+
> > | 2   | 0x4   | __arm_mcrr and __arm_mrrc|
> > +-+---+--+
> > | 3   | 0x8   | __arm_mcrr2 and __arm_mrrc2  |
> > +-+---+--+
> > 
> > This patch implements full support for this feature macro as defined in 
> > section
> > 5.9 of the ACLE
> > (https://developer.arm.com/products/software-development-tools/compilers/arm-compiler-5/docs/101028/latest/5-feature-test-macros).
> > 
> > gcc/ChangeLog:
> > 
> > 2017-06-14  Prakhar Bahuguna  
> > 
> > * config/arm/arm-c.c (arm_cpu_builtins): New block to define
> >  __ARM_FEATURE_COPROC according to support.
> > 
> > 2017-06-14  Prakhar Bahuguna  
> > * gcc/testsuite/gcc.target/arm/acle/cdp.c: Add feature macro bitmap
> > test.
> > * gcc/testsuite/gcc.target/arm/acle/cdp2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/ldc.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/ldc2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/ldc2l.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/ldcl.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mcr.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mcr2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mcrr.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mcrr2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mrc.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mrc2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mrrc.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/mrrc2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/stc.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/stc2.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/stc2l.c: Likewise.
> > * gcc/testsuite/gcc.target/arm/acle/stcl.c: Likewise.
> > 
> > Testing done: ACLE regression tests updated with tests for feature macro 
> > bits.
> > All regression tests pass.
> > 
> > Okay for trunk?
> > 
> > 
> > 0001-Implement-__ARM_FEATURE_COPROC-coprocessor-intrinsic.patch
> > 
> > 
> > From 79d71aec9d2bdee936b240ae49368ff5f8d8fc48 Mon Sep 17 00:00:00 2001
> > From: Prakhar Bahuguna 
> > Date: Tue, 2 May 2017 13:43:40 +0100
> > Subject: [PATCH] Implement __ARM_FEATURE_COPROC coprocessor intrinsic 
> > feature
> >  macro
> > 
> > ---
> >  gcc/config/arm/arm-c.c| 19 +++
> >  gcc/testsuite/gcc.target/arm/acle/cdp.c   |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/cdp2.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/ldc.c   |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/ldc2.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/ldc2l.c |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/ldcl.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mcr.c   |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mcr2.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mcrr.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mcrr2.c |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mrc.c   |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mrc2.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mrrc.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/mrrc2.c |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/stc.c   |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/stc2.c  |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/stc2l.c |  3 +++
> >  gcc/testsuite/gcc.target/arm/acle/stcl.c  |  3 +++
> >  19 files changed, 73 insertions(+)
> > 
> > diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
> > index 3abe7d1f1f5..3daf4e5e1f3 100644
> > --- a/gcc/config/arm/arm-c.c
> > +++ b/gcc/config/arm/arm-c.c
> > @@ -200,6 +200,25 @@ arm_cpu_builtins (struct cpp_reader* pfile)
> >

Re: [PATCH, rs6000] (v2) Fold vector shifts in GIMPLE

2017-06-16 Thread Richard Biener
On Wed, Jun 14, 2017 at 4:55 PM, Will Schmidt  wrote:
> On Tue, 2017-06-13 at 10:03 +0200, Richard Biener wrote:
>> On Mon, Jun 12, 2017 at 11:56 PM, Will Schmidt
>>  wrote:
>> > Hi,
>> >
>> >
>> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>> > index 63ca2d1..55592fb 100644
>> > --- a/gcc/config/rs6000/rs6000.c
>> > +++ b/gcc/config/rs6000/rs6000.c
>> > @@ -16588,6 +16588,83 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
>> > *gsi)
>> > gsi_replace (gsi, g, true);
>> > return true;
>> >}
> 
>> > +/* Flavors of vector shift right.  */
>> > +case ALTIVEC_BUILTIN_VSRB:
>> > +case ALTIVEC_BUILTIN_VSRH:
>> > +case ALTIVEC_BUILTIN_VSRW:
>> > +case P8V_BUILTIN_VSRD:
>> > +  {
>> > +   arg0 = gimple_call_arg (stmt, 0);
>> > +   arg1 = gimple_call_arg (stmt, 1);
>> > +   lhs = gimple_call_lhs (stmt);
>> > +   gimple *g;
>> > +   /* convert arg0 to unsigned.  */
>> > +   arg0 = convert (unsigned_type_for (TREE_TYPE (arg0)), arg0);
>>
>> Please do not use 'convert', instead do ...
>
> Hi Richard,
>
> V3 of this patch , using the gimple_build() convenience helper function
> has been posted, and is the direction I'm going for with this patch.  I
> wanted to make sure I fully understood the other options though, so I
> have a question/clarification on the other suggestions:
>
>> > +   tree arg0_uns = create_tmp_reg_or_ssa_name
>> > +  (unsigned_type_for (TREE_TYPE (arg0)));
>> > +   g = gimple_build_assign (arg0_uns, arg0);
>>
>>g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR, usigned_type, arg0);
>
> I tried a few trivial variations of this:
> g = gimple_build_assign (arg0_uns, VIEW_CONVERT_EXPR,
>  unsigned_type_for (TREE_TYPE(arg0_uns)), arg0);
>
> which lookd good, but it asserts in gimple_build_assign_1(), on the
> check
> "if (op2)
>  {
>  gcc_assert (num_ops > 2);
> ...
>
> Trolling around the other code for references, i found and tried this,
> which uses the build1() helper, and appears to work.  Is this the gist
> of what you suggested, or would there be another alternative?
>
>g = gimple_build_assign (arg0_uns,
>   build1(VIEW_CONVERT_EXPR,
>  unsigned_type_for (TREE_TYPE(arg0_uns)), arg0));
>
> Thanks for the feedback, etc.  :-)

Yeah, sorry -- the gimple_build machinery handles this GIMPLE wart
transparently but gimple_build_assign does not ...

Richard.

> -Will
>
>
>> You also want to avoid spitting out useless copies here if the
>> arg/result is already unsigned,
>> like via
>>
>> tree arg0_uns = arg0;
>> if (! TYPE_UNSIGNED (TREE_TYPE (arg0_uns)))
>>  {
>> ...
>>  }
>>
>> > +   gimple_set_location (g, gimple_location (stmt));
>> > +   gsi_insert_before (gsi, g, GSI_SAME_STMT);
>> > +   /* convert lhs to unsigned and do the shift.  */
>>
>> Just use lhs if it has the same sign as arg0_uns.
>>
>> > +   tree lhs_uns = create_tmp_reg_or_ssa_name
>> > +  (unsigned_type_for (TREE_TYPE (lhs)));
>>
>> You can re-use the type of arg0_uns here.
>>
>> > +   g = gimple_build_assign (lhs_uns, RSHIFT_EXPR, arg0_uns, arg1);
>> > +   gimple_set_location (g, gimple_location (stmt));
>> > +   gsi_insert_before (gsi, g, GSI_SAME_STMT);
>> > +   /* convert lhs back to a signed type for the return.  */
>> > +   lhs_uns = convert (signed_type_for (TREE_TYPE (lhs)),lhs_uns);
>> > +   g = gimple_build_assign (lhs, lhs_uns);
>>
>> See above for how to perform the conversion.
>>
>> Note that you could use the gimple_build convenience to shorten the code
>> sequence above to
>>
>> gimple_seq stmts = NULL;
>> tree arg0_unsigned = gimple_build (, VIEW_CONVERT_EXPR,
>>
>> unsigned_type_for (...), arg0);
>> tree res = gimple_build (, RSHIFT_EXPR, TREE_TYPE (arg0_uns),
>>arg0_uns, arg1);
>> res = gimple_build (, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
>> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>> update_call_from_tree (gsi, res);
>>
>> The above gimple_build sequence will fold all the stmts thus remove
>> useless conversions and apply constant folding, etc.
>>
>> Richard.
>>
>> > +   gimple_set_location (g, gimple_location (stmt));
>> > +   gsi_replace (gsi, g, true);
>> > +   return true;
>> > +  }
>> >  default:
>> >break;
>> >  }
>
>
>


Re: [Patch ARM] Fix PR71778

2017-06-16 Thread James Greenhalgh

On Wed, Jun 14, 2017 at 11:21:30AM +0100, Kyrill Tkachov wrote:

  <...>

> That movv2di expander is the one in vec-common.md that ends up calling
> neon_make_constant. I wonder why const0_rtx passed its predicate check
> (that would require a V2DImode vector of zeroes rather than a const0_rtx).
> Perhaps the midend code at this point doesn't check the operand predicate.
>
> In the builtin expansion code that you quoted I wonder wonder if we could fail
> more gracefully by returning CONST0_RTX (mode[argc]) to match the expected
> mode of the operand (we've already emitted an error, so we shouldn't care
> what RTL we emit as long as it doesn't cause an ICE).

  <...>

> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index e503891..b8d59c6 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -12124,6 +12124,11 @@ neon_make_constant (rtx vals)
>if (n_const == n_elts)
>   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
>  }
> +  else if (vals == const0_rtx)
> +/* Something invalid, perhaps from expanding an intrinsic
> +   which requires a constant argument, where a variable argument
> +   was passed.  */
> + return const0_rtx;
>else
>  gcc_unreachable ();
>
> I'm not a fan of this as the function has a precondition that its argument is
> a PARALLEL or a CONST_VECTOR and special-casing const0_rtx breaks that. I'd
> rather we tried fixing this closer to the error source.  Can you try the
> suggestion above instead please?

Your suggestion doesn't quite work, but this is pretty close to it. Rather
than try to guess at the correct mode for CONST0_RTX (we can't just use
mode[argc] as that will get you the scalar mode), we can just return target
directly. That will ensure we've given something valid back in the correct
mode, even if it is not all that useful.

Bootstrapped on arm-none-linux-gnueabihf. OK?

Thanks,
James

---
gcc/

2017-06-15  James Greenhalgh  

PR target/71778
* config/arm/arm-builtins.c (arm_expand_builtin_args): Return TARGET
if given a non-constant argument for an intrinsic which requires a
constant.

gcc/testsuite/

2017-06-15  James Greenhalgh  

PR target/71778
* gcc.target/arm/pr71778.c: New.

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index a0569ed..8ecf581 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -2245,7 +2245,12 @@ constant_arg:
 		{
 		  error ("%Kargument %d must be a constant immediate",
 			 exp, argc + 1);
-		  return const0_rtx;
+		  /* We have failed to expand the pattern, and are safely
+		 in to invalid code.  But the mid-end will still try to
+		 build an assignment for this node while it expands,
+		 before stopping for the error, just pass it back
+		 TARGET to ensure a valid assignment.  */
+		  return target;
 		}
 	  break;
 
diff --git a/gcc/testsuite/gcc.target/arm/pr71778.c b/gcc/testsuite/gcc.target/arm/pr71778.c
new file mode 100644
index 000..d5b0d04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr71778.c
@@ -0,0 +1,24 @@
+/* { dg-do compile }  */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+typedef __simd128_int32_t int32x4_t;
+
+__extension__ extern __inline int32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  /* Errors for arm_neon.h intrinsics using constants end up on the line
+ in arm_neon.h rather than the source file line.  That means we
+ need to put the dg-error up here, rather than on line 22 where we'd
+ like it.  */
+  return (int32x4_t)__builtin_neon_vshrs_nv4si (__a, __b); /* { dg-error "argument 2 must be a constant immediate" } */
+}
+
+int32x4_t
+shift (int32x4_t a, int b)
+{
+  return vshrq_n_s32 (a, b);
+}
+


Re: [PATCH] warn on mem calls modifying objects of non-trivial types (PR 80560)

2017-06-16 Thread Richard Biener
On Thu, Jun 15, 2017 at 11:31 PM, Jason Merrill  wrote:
> On Thu, Jun 15, 2017 at 12:26 PM, Martin Sebor  wrote:
>> On 06/12/2017 03:36 PM, Jason Merrill wrote:
>>>
>>> On 06/08/2017 01:25 PM, Martin Sebor wrote:

 +  if (TREE_CHAIN (DECL_ARGUMENTS (f)))
 +{
 +  /* Skip constructors that aren't copy or move ctors.  */
 +  if (!copy_fn_p (f))
 +continue;
 +
 +  cpy_or_move_ctor_p = true;
 +}
 +  else
 +{
 +  /* Constructor is a default ctor.  */
 +  cpy_or_move_ctor_p = false;
 +}
>>>
>>> A default constructor can have parameters, so long as they have default
>>> arguments.  You can use default_ctor_p to test for a default constructor.
>>
>> Thank you for the suggestion.  Attached is an incremental diff
>> with this tweak plus a test for it.
>>
>> The code above has been there in the last three revisions of
>> the patch
>
> Yeah, I don't always notice everything :)
>
>> are there any other changes you'd like me to make?
>
> No, the patch is OK with this change.

This broke build with GCC 4.8 as host compiler:

g++ -fno-PIE -c   -g  -DIN_GCC -fno-exceptions -fno-rtti
-fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings
-Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic
-Wno-long-long -Wno-variadic-macros -Wno-overlength-strings
-fno-common  -DHAVE_CONFIG_H -I. -I.
-I/space/rguenther/src/svn/early-lto-debug/gcc
-I/space/rguenther/src/svn/early-lto-debug/gcc/.
-I/space/rguenther/src/svn/early-lto-debug/gcc/../include
-I/space/rguenther/src/svn/early-lto-debug/gcc/../libcpp/include
-I/space/rguenther/src/svn/early-lto-debug/gcc/../libdecnumber
-I/space/rguenther/src/svn/early-lto-debug/gcc/../libdecnumber/bid
-I../libdecnumber
-I/space/rguenther/src/svn/early-lto-debug/gcc/../libbacktrace   -o
tree-switch-conversion.o -MT tree-switch-conversion.o -MMD -MP -MF
./.deps/tree-switch-conversion.TPo
/space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c
/space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c:
In function ‘void emit_case_bit_tests(gswitch*, tree, tree, tree,
tree)’:
/space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c:271:53:
warning: missing initializer for member ‘case_bit_test::mask’
[-Wmissing-field-initializers]
   struct case_bit_test test[MAX_CASE_BIT_TESTS] = { };
 ^
...
/space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c:271:53:
internal compiler error: in gimplify_init_constructor, at
gimplify.c:4271
Please submit a full bug report,
with preprocessed source if appropriate.
See  for instructions.
Makefile:1102: recipe for target 'tree-switch-conversion.o' failed
make: *** [tree-switch-conversion.o] Error 1

Please fix.

Richard.

> Jason


Re: Avoid generating useless range info

2017-06-16 Thread Richard Biener
On Wed, Jun 14, 2017 at 6:41 PM, Aldy Hernandez  wrote:
> Hi!
>
> As discovered in my range class work, we seem to generate a significant
> amount of useless range info out of VRP.
>
> Is there any reason why we can't avoid generating any range info that spans
> the entire domain, and yet contains nothing in the non-zero bitmask?
>
> The attached patch passes bootstrap, and the one regression it causes is
> because now the -Walloca-larger-than= pass is better able to determine that
> there is no range information at all, and the testcase is unbounded.
> So...win, win.
>
> OK for trunk?

Can you please do this in set_range_info itself?  Thus, if min ==
wi::min_value && max == wi::max_value
simply return?  (do not use TYPE_MIN?MAX_VALUE please)

Thanks,
Richard.

> Aldy


Re: [Patch, Fortran, F03] PR 80983: memory leak when calling procedure-pointer component with allocatable result

2017-06-16 Thread Christophe Lyon
Hi,

On 15 June 2017 at 23:18, Janus Weil  wrote:
> 2017-06-15 15:33 GMT+02:00 Thomas Koenig :
>> Hi Janus,
>>
>>> It regtests cleanly on x86_64-linux-gnu. Ok for trunk?
>>
>>
>> OK.
>>
>> Thanks for the patch!
>
> Thanks, Thomas! Committed as r249227.
>

There's a problem with the new test. It says:
gfortran.dg/proc_ptr_comp_51.f90   -O  : dump file does not exist
UNRESOLVED: gfortran.dg/proc_ptr_comp_51.f90   -O
scan-tree-dump-times original "__builtin_free" 3

You probably either want to add:
! { dg-options "-fdump-tree-original" }
or to remove:
! { dg-final { scan-tree-dump-times "__builtin_free" 3 "original" } }

Thanks,

Christophe

> Cheers,
> Janus


Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-16 Thread James Greenhalgh

On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> On Mon, 12 Jun 2017, James Greenhalgh wrote:
>
> >
> > Hi,
> >
> > As subject, for the testcase in the patch:
> >
> >   unsigned long
> >   f2 (unsigned long a, int b)
> >   {
> > unsigned long x = 1UL << b;
> > return a / x;
> >   }
> >
> > We currently generate:
> >
> >   f2:
> > mov x2, 1
> > lsl x1, x2, x1
> > udivx0, x0, x1
> > ret
> >
> > Which could instead be transformed to:
> >
> >   f2:
> > lsr x0, x0, x1
> > ret
> >
> > OK?
>
> +   We can't do the same for signed A, as it might be negative, which
> would
> +   introduce undefined behaviour.  */
>
> huh, AFAIR it is _left_ shift of negative values that invokes
> undefined behavior.

You're right this is not a clear comment. The problem is not undefined
behaviour, so that text needs to go, but rounding towards/away from zero
for signed negative values. Division will round towards zero, arithmetic
right shift away from zero. For example in:

-1 / (1 << 1)   !=-1 >> 1
  = -1 / 2
  = 0 = -1

I've rewritten the comment to make it clear this is why we can only make
this optimisation for unsigned values.

See, for example, gcc.c-torture/execute/pr34070-2.c

> Note that as you are accepting vectors you need to make sure the
> target actually supports arithmetic right shift of vectors
> (you only know it supports left shift and division -- so it might
> be sort-of-superfluous to check in case there is no arch that supports
> those but not the other).

I've added a check for that using optabs, is that the right way to do this?

Bootstrapped and tested on aarch64-none-linux-gnu with no issues.

OK?

Thanks,
James

---
gcc/

2017-06-13  James Greenhalgh  

* match.pd (A / (1 << B) -> A >> B): New.
* generic-match-head.c: Include optabs-tree.h.
* gimple-match-head.c: Likewise.

gcc/testsuite/

2017-06-13  James Greenhalgh  

* gcc.dg/tree-ssa/forwprop-37.c: New.

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..2bea268 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (op @0 integer_onep)
 (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if (TYPE_UNSIGNED (type)
+  && (!VECTOR_TYPE_P (type)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
undefined behavior in constexpr evaluation, and assuming that the division
traps enables better optimizations than these anyway.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */


Re: [PATCH] Instrument aggregate call arguments even with -fsanitize=object-size (PR sanitizer/81094)

2017-06-16 Thread Richard Biener
On Wed, 14 Jun 2017, Jakub Jelinek wrote:

> Hi!
> 
> -fsanitize=object-size is yet another sanitization that ignored aggregate
> function arguments.  Fixed thusly (plus some small cleanup for
> instrument_null), bootstrapped/regtested on x86_64-linux and i686-linux, ok
> for trunk?

Ok.

Richard.

> 2017-06-14  Jakub Jelinek  
> 
>   PR sanitizer/81094
>   * ubsan.c (instrument_null): Add T argument, use it instead
>   of computing it based on IS_LHS.
>   (instrument_object_size): Likewise.
>   (pass_ubsan::execute): Adjust instrument_null and
>   instrument_object_size callers to pass gimple_get_lhs or
>   gimple_assign_rhs1 result to it.  Use instrument_null instead of
>   calling get_base_address and instrument_mem_ref.  Handle
>   aggregate call arguments for object-size sanitization.
> 
>   * c-c++-common/ubsan/object-size-11.c: New test.
> 
> --- gcc/ubsan.c.jj2017-06-14 14:40:39.0 +0200
> +++ gcc/ubsan.c   2017-06-14 14:49:17.702131958 +0200
> @@ -1204,10 +1204,8 @@ instrument_mem_ref (tree mem, tree base,
>  /* Perform the pointer instrumentation.  */
>  
>  static void
> -instrument_null (gimple_stmt_iterator gsi, bool is_lhs)
> +instrument_null (gimple_stmt_iterator gsi, tree t, bool is_lhs)
>  {
> -  gimple *stmt = gsi_stmt (gsi);
> -  tree t = is_lhs ? gimple_get_lhs (stmt) : gimple_assign_rhs1 (stmt);
>/* Handle also e.g. >i.  */
>if (TREE_CODE (t) == ADDR_EXPR)
>  t = TREE_OPERAND (t, 0);
> @@ -1754,11 +1752,10 @@ instrument_nonnull_return (gimple_stmt_i
> points to an out-of-bounds location.  */
>  
>  static void
> -instrument_object_size (gimple_stmt_iterator *gsi, bool is_lhs)
> +instrument_object_size (gimple_stmt_iterator *gsi, tree t, bool is_lhs)
>  {
>gimple *stmt = gsi_stmt (*gsi);
>location_t loc = gimple_location (stmt);
> -  tree t = is_lhs ? gimple_get_lhs (stmt) : gimple_assign_rhs1 (stmt);
>tree type;
>tree index = NULL_TREE;
>HOST_WIDE_INT size_in_bytes;
> @@ -1989,9 +1986,9 @@ pass_ubsan::execute (function *fun)
> if (sanitize_flags_p (SANITIZE_NULL | SANITIZE_ALIGNMENT, fun->decl))
>   {
> if (gimple_store_p (stmt))
> - instrument_null (gsi, true);
> + instrument_null (gsi, gimple_get_lhs (stmt), true);
> if (gimple_assign_single_p (stmt))
> - instrument_null (gsi, false);
> + instrument_null (gsi, gimple_assign_rhs1 (stmt), false);
> if (is_gimple_call (stmt))
>   {
> unsigned args_num = gimple_call_num_args (stmt);
> @@ -2000,10 +1997,7 @@ pass_ubsan::execute (function *fun)
> tree arg = gimple_call_arg (stmt, i);
> if (is_gimple_reg (arg) || is_gimple_min_invariant (arg))
>   continue;
> -   tree base = get_base_address (arg);
> -   if (TREE_CODE (base) == MEM_REF
> -   && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
> - instrument_mem_ref (arg, base, , false);
> +   instrument_null (gsi, arg, false);
>   }
>   }
>   }
> @@ -2033,9 +2027,21 @@ pass_ubsan::execute (function *fun)
> if (sanitize_flags_p (SANITIZE_OBJECT_SIZE, fun->decl))
>   {
> if (gimple_store_p (stmt))
> - instrument_object_size (, true);
> + instrument_object_size (, gimple_get_lhs (stmt), true);
> if (gimple_assign_load_p (stmt))
> - instrument_object_size (, false);
> + instrument_object_size (, gimple_assign_rhs1 (stmt),
> + false);
> +   if (is_gimple_call (stmt))
> + {
> +   unsigned args_num = gimple_call_num_args (stmt);
> +   for (unsigned i = 0; i < args_num; ++i)
> + {
> +   tree arg = gimple_call_arg (stmt, i);
> +   if (is_gimple_reg (arg) || is_gimple_min_invariant (arg))
> + continue;
> +   instrument_object_size (, arg, false);
> + }
> + }
>   }
>  
> gsi_next ();
> --- gcc/testsuite/c-c++-common/ubsan/object-size-11.c.jj  2017-06-14 
> 16:16:43.192137010 +0200
> +++ gcc/testsuite/c-c++-common/ubsan/object-size-11.c 2017-06-14 
> 16:16:22.0 +0200
> @@ -0,0 +1,53 @@
> +/* PR sanitizer/81094 */
> +/* { dg-do run } */
> +/* { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
> +/* { dg-options "-fsanitize=object-size" } */
> +
> +#define N 20
> +
> +struct S { int i; };
> +
> +__attribute__((noinline, noclone)) void
> +f0 (struct S s)
> +{
> +  asm volatile ("" : : "r" (s.i) : "memory");
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f1 (int i)
> +{
> +  char *orig;
> +  struct S *p;
> +  orig = (char *) __builtin_calloc (N, sizeof (struct S));
> +  p = (struct S 

Re: [PATCH] warn on mem calls modifying objects of non-trivial types (PR 80560)

2017-06-16 Thread Richard Biener
On Fri, Jun 16, 2017 at 9:38 AM, Richard Biener
 wrote:
> On Thu, Jun 15, 2017 at 11:31 PM, Jason Merrill  wrote:
>> On Thu, Jun 15, 2017 at 12:26 PM, Martin Sebor  wrote:
>>> On 06/12/2017 03:36 PM, Jason Merrill wrote:

 On 06/08/2017 01:25 PM, Martin Sebor wrote:
>
> +  if (TREE_CHAIN (DECL_ARGUMENTS (f)))
> +{
> +  /* Skip constructors that aren't copy or move ctors.  */
> +  if (!copy_fn_p (f))
> +continue;
> +
> +  cpy_or_move_ctor_p = true;
> +}
> +  else
> +{
> +  /* Constructor is a default ctor.  */
> +  cpy_or_move_ctor_p = false;
> +}

 A default constructor can have parameters, so long as they have default
 arguments.  You can use default_ctor_p to test for a default constructor.
>>>
>>> Thank you for the suggestion.  Attached is an incremental diff
>>> with this tweak plus a test for it.
>>>
>>> The code above has been there in the last three revisions of
>>> the patch
>>
>> Yeah, I don't always notice everything :)
>>
>>> are there any other changes you'd like me to make?
>>
>> No, the patch is OK with this change.
>
> This broke build with GCC 4.8 as host compiler:
>
> g++ -fno-PIE -c   -g  -DIN_GCC -fno-exceptions -fno-rtti
> -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings
> -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic
> -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings
> -fno-common  -DHAVE_CONFIG_H -I. -I.
> -I/space/rguenther/src/svn/early-lto-debug/gcc
> -I/space/rguenther/src/svn/early-lto-debug/gcc/.
> -I/space/rguenther/src/svn/early-lto-debug/gcc/../include
> -I/space/rguenther/src/svn/early-lto-debug/gcc/../libcpp/include
> -I/space/rguenther/src/svn/early-lto-debug/gcc/../libdecnumber
> -I/space/rguenther/src/svn/early-lto-debug/gcc/../libdecnumber/bid
> -I../libdecnumber
> -I/space/rguenther/src/svn/early-lto-debug/gcc/../libbacktrace   -o
> tree-switch-conversion.o -MT tree-switch-conversion.o -MMD -MP -MF
> ./.deps/tree-switch-conversion.TPo
> /space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c
> /space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c:
> In function ‘void emit_case_bit_tests(gswitch*, tree, tree, tree,
> tree)’:
> /space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c:271:53:
> warning: missing initializer for member ‘case_bit_test::mask’
> [-Wmissing-field-initializers]
>struct case_bit_test test[MAX_CASE_BIT_TESTS] = { };
>  ^
> ...
> /space/rguenther/src/svn/early-lto-debug/gcc/tree-switch-conversion.c:271:53:
> internal compiler error: in gimplify_init_constructor, at
> gimplify.c:4271
> Please submit a full bug report,
> with preprocessed source if appropriate.
> See  for instructions.
> Makefile:1102: recipe for target 'tree-switch-conversion.o' failed
> make: *** [tree-switch-conversion.o] Error 1
>
> Please fix.

Using

  struct case_bit_test test[MAX_CASE_BIT_TESTS] = { {} };

avoids the ICE, the warning persists (not sure if correctly or not).
I'll commit this
workaround if it survives stage2/3 -Werror.

Richard.

> Richard.
>
>> Jason


Re: [patch] Do not directly change TARGET_MEM_REF in maybe_canonicalize_mem_ref_addr

2017-06-16 Thread Richard Biener
On Thu, Jun 15, 2017 at 3:51 PM, Eric Botcazou  wrote:
> Hi,
>
> the transformation done to TARGET_MEM_REF in maybe_canonicalize_mem_ref_addr
> is exactly the same as one of those done in maybe_fold_tmr, the latter is
> better written and the former function calls the latter, so this patch changes
> maybe_canonicalize_mem_ref_addr to avoid touching TARGET_MEM_REF directly.
>
> Tested on x86-64/Linux, OK for the mainline?

I don't think so.  get_address_description assumes TMR_BASE is in
canonical form,
that is, when it is an ADDR_EXPR we have a symbol and when not we have
a pointer.
TMR[>a] violates this and the gimple-fold.c part first canonicalizes this to
TMR[p + offsetof(a)].

Richard.

>
> 2017-06-15  Eric Botcazou  
>
> PR bootstrap/80897
> * gimple-fold.c (maybe_canonicalize_mem_ref_addr): Do not change
> TARGET_MEM_REF expressions directly.
>
> --
> Eric Botcazou


[PATCH] Fix missing returns in libstdc++ header and tests

2017-06-16 Thread Jonathan Wakely

Some missing returns found by ubsan.

* include/bits/locale_conv.h (wbuffer_convert::_M_put): Add missing
return statement.
* testsuite/21_strings/basic_string_view/operations/copy/char/1.cc:
Return void.
* testsuite/21_strings/basic_string_view/operations/copy/wchar_t/1.cc:
Likewise.
* testsuite/23_containers/map/modifiers/insert_or_assign/1.cc: Add
missing return statements.
* testsuite/23_containers/unordered_map/modifiers/insert_or_assign.cc:
Likewise.
* testsuite/27_io/basic_istream/extractors_arithmetic/char/12.cc:
Return void.
* testsuite/special_functions/14_expint/pr68397.cc: Likewise.

Tested powerpc64le-linux, committed to trunk.

commit 9e3a856a6ac1d10c4fdb92d99a6db2e9e9f13a44
Author: Jonathan Wakely 
Date:   Thu Jun 15 18:11:03 2017 +0100

Fix missing returns in libstdc++ header and tests

* include/bits/locale_conv.h (wbuffer_convert::_M_put): Add missing
return statement.
* testsuite/21_strings/basic_string_view/operations/copy/char/1.cc:
Return void.
* testsuite/21_strings/basic_string_view/operations/copy/wchar_t/1.cc:
Likewise.
* testsuite/23_containers/map/modifiers/insert_or_assign/1.cc: Add
missing return statements.
* testsuite/23_containers/unordered_map/modifiers/insert_or_assign.cc:
Likewise.
* testsuite/27_io/basic_istream/extractors_arithmetic/char/12.cc:
Return void.
* testsuite/special_functions/14_expint/pr68397.cc: Likewise.

diff --git a/libstdc++-v3/include/bits/locale_conv.h 
b/libstdc++-v3/include/bits/locale_conv.h
index 9b952d4..74beaec 100644
--- a/libstdc++-v3/include/bits/locale_conv.h
+++ b/libstdc++-v3/include/bits/locale_conv.h
@@ -482,6 +482,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
   {
if (_M_buf->sputn(__p, __n) < __n)
  return false;
+   return true;
   }
 
   // convert the put area and write to the byte stream buffer
diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/char/1.cc 
b/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/char/1.cc
index 88a74f6..c2d1951 100644
--- 
a/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/char/1.cc
+++ 
b/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/char/1.cc
@@ -22,7 +22,7 @@
 #include 
 #include 
 
-bool
+void
 test01()
 {
   typedef std::string_view::size_type csize_type;
diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/wchar_t/1.cc
 
b/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/wchar_t/1.cc
index 0e625d6..957c80c 100644
--- 
a/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/wchar_t/1.cc
+++ 
b/libstdc++-v3/testsuite/21_strings/basic_string_view/operations/copy/wchar_t/1.cc
@@ -22,7 +22,7 @@
 #include 
 #include 
 
-bool
+void
 test01()
 {
   typedef std::wstring_view::size_type csize_type;
diff --git 
a/libstdc++-v3/testsuite/23_containers/map/modifiers/insert_or_assign/1.cc 
b/libstdc++-v3/testsuite/23_containers/map/modifiers/insert_or_assign/1.cc
index 48c17eb..aaf5d8a 100644
--- a/libstdc++-v3/testsuite/23_containers/map/modifiers/insert_or_assign/1.cc
+++ b/libstdc++-v3/testsuite/23_containers/map/modifiers/insert_or_assign/1.cc
@@ -39,10 +39,12 @@ struct Val
   {
 val = other.val;
 other.moved_from_assign = true;
+return *this;
   }
   Val& operator=(const Val& other)
   {
 val = other.val;
+return *this;
   }
 };
 
diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/insert_or_assign.cc
 
b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/insert_or_assign.cc
index 67fd164..d4b9601 100644
--- 
a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/insert_or_assign.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/insert_or_assign.cc
@@ -38,10 +38,12 @@ struct Val
   {
 val = other.val;
 other.moved_from_assign = true;
+return *this;
   }
   Val& operator=(const Val& other)
   {
 val = other.val;
+return *this;
   }
 };
 
diff --git 
a/libstdc++-v3/testsuite/27_io/basic_istream/extractors_arithmetic/char/12.cc 
b/libstdc++-v3/testsuite/27_io/basic_istream/extractors_arithmetic/char/12.cc
index 24cd2f5..7e3b773 100644
--- 
a/libstdc++-v3/testsuite/27_io/basic_istream/extractors_arithmetic/char/12.cc
+++ 
b/libstdc++-v3/testsuite/27_io/basic_istream/extractors_arithmetic/char/12.cc
@@ -50,7 +50,7 @@ void test12_aux(bool integer_type)
   VERIFY(is.fail());
 }
 
-bool test12()
+void test12()
 {
   test12_aux(true);
   test12_aux(true);
diff --git a/libstdc++-v3/testsuite/special_functions/14_expint/pr68397.cc 
b/libstdc++-v3/testsuite/special_functions/14_expint/pr68397.cc
index 8da16d1..9527f40 100644
--- a/libstdc++-v3/testsuite/special_functions/14_expint/pr68397.cc
+++ 

Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)

2017-06-16 Thread Richard Biener
On Fri, 16 Jun 2017, James Greenhalgh wrote:

> 
> On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > On Mon, 12 Jun 2017, James Greenhalgh wrote:
> >
> > >
> > > Hi,
> > >
> > > As subject, for the testcase in the patch:
> > >
> > >   unsigned long
> > >   f2 (unsigned long a, int b)
> > >   {
> > > unsigned long x = 1UL << b;
> > > return a / x;
> > >   }
> > >
> > > We currently generate:
> > >
> > >   f2:
> > >   mov x2, 1
> > >   lsl x1, x2, x1
> > >   udivx0, x0, x1
> > >   ret
> > >
> > > Which could instead be transformed to:
> > >
> > >   f2:
> > >   lsr x0, x0, x1
> > >   ret
> > >
> > > OK?
> >
> > +   We can't do the same for signed A, as it might be negative, which
> > would
> > +   introduce undefined behaviour.  */
> >
> > huh, AFAIR it is _left_ shift of negative values that invokes
> > undefined behavior.
> 
> You're right this is not a clear comment. The problem is not undefined
> behaviour, so that text needs to go, but rounding towards/away from zero
> for signed negative values. Division will round towards zero, arithmetic
> right shift away from zero. For example in:
> 
> -1 / (1 << 1)   !=-1 >> 1
>   = -1 / 2
>   = 0 = -1
> 
> I've rewritten the comment to make it clear this is why we can only make
> this optimisation for unsigned values.

Ah, of course.  You could use

 if ((TYPE_UNSIGNED (type)
  || tree_expr_nonnegative_p (@0))

here as improvement.

> See, for example, gcc.c-torture/execute/pr34070-2.c
> 
> > Note that as you are accepting vectors you need to make sure the
> > target actually supports arithmetic right shift of vectors
> > (you only know it supports left shift and division -- so it might
> > be sort-of-superfluous to check in case there is no arch that supports
> > those but not the other).
> 
> I've added a check for that using optabs, is that the right way to do this?

+  && (!VECTOR_TYPE_P (type)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
+  || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))

is not enough -- you need sth like

 optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
 if (ot != unknown_optab
 && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
   .. ok! ...

ideally we'd have a helper for this in optab-tree.[ch], 
tree-vect-patterns.c could also make use of that.

Thanks,
Richard.


> Bootstrapped and tested on aarch64-none-linux-gnu with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-13  James Greenhalgh  
> 
>   * match.pd (A / (1 << B) -> A >> B): New.
>   * generic-match-head.c: Include optabs-tree.h.
>   * gimple-match-head.c: Likewise.
> 
> gcc/testsuite/
> 
> 2017-06-13  James Greenhalgh  
> 
>   * gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


RE: [PATCH][X86] Fix rounding pattern similar to PR73350

2017-06-16 Thread Koval, Julia
Hi,

This test hangs on avx512er, maybe that's why:
> According to POSIX, the behavior of a process is undefined after it ignores a 
> SIGFPE, SIGILL, or SIGSEGV signal that was not generated by kill(2) or 
> raise(3).

And volatile make it work even without a patch(r1 and r2 are not combined then).

Added other changes.

Thanks,
Julia


> -Original Message-
> From: Uros Bizjak [mailto:ubiz...@gmail.com]
> Sent: Wednesday, June 14, 2017 11:54 AM
> To: Koval, Julia 
> Cc: Richard Biener ; Jakub Jelinek
> ; H.J. Lu ; GCC Patches  patc...@gcc.gnu.org>; Kirill Yukhin 
> Subject: Re: [PATCH][X86] Fix rounding pattern similar to PR73350
> 
> On Tue, Jun 13, 2017 at 1:37 PM, Koval, Julia  wrote:
> > Thank you for your help. I fixed the test similar to existing sigaction 
> > tests.
> >
> > gcc/
> > * config/i386/i386.c: Fix rounding expand for new pattern.
> > * config/i386/subst.md: Fix pattern (parallel -> unspec).
> > gcc/testsuite/
> > * gcc.target/i386/pr73350-2.c: New test.
> 
> The test will fail at runtime on non-avx512er targets. Can you please
> test the attached testcase?
> 
> Uros.


0001-fix.patch
Description: 0001-fix.patch


Re: [PATCH][PR 81089][PING] Fix -O3 bootstrap

2017-06-16 Thread Richard Biener
On Thu, 15 Jun 2017, Yuri Gribov wrote:

> On Thu, Jun 15, 2017 at 6:08 AM, Yuri Gribov  wrote:
> > Hi all,
> >
> > This patch fixes regression at bootstrap-O3.
> >
> > Local x64 bootstrap-O3 has completed w/o errors. I didn't run regtests
> > (I think it's not needed, given that new code can now run in fewer
> > cases than before).
> >
> > Ok to commit? (if yes - could someone commit for me?)
> 
> Sorry for early reminder but this fixes bootstrap-O3 so is probly urgent.

Ok.

I'll commit it for you.

Richard.


Re: [PATCH][X86] Fix rounding pattern similar to PR73350

2017-06-16 Thread Uros Bizjak
On Fri, Jun 16, 2017 at 8:46 AM, Koval, Julia  wrote:
> Hi,
>
> This test hangs on avx512er, maybe that's why:
>> According to POSIX, the behavior of a process is undefined after it ignores 
>> a SIGFPE, SIGILL, or SIGSEGV signal that was not generated by kill(2) or 
>> raise(3).
>
> And volatile make it work even without a patch(r1 and r2 are not combined 
> then).
>
> Added other changes.

The testcase LGTM. I'll leave the final approval to Kirill.

Uros.


Re: [PATCH GCC][10/13]Compute and cache data dependence relation

2017-06-16 Thread Richard Biener
On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng  wrote:
> Hi,
> This patch computes and caches data dependence relation in a hash table
> so that it can be queried multiple times later for partition dependence
> check.
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

+/* Vector of data dependence relations.  */
+static vec *ddrs_vec;
+
+/* Hash table for data dependence relation in the loop to be distributed.  */
+static hash_table *ddrs_table;

avoid the extra indirection.

+/* Hashtable entry for data reference relation.  */
+struct ddr_entry
+{
+  data_reference_p a;
+  data_reference_p b;
+  ddr_p ddr;
+  hashval_t hash;
+};
...
+/* Hash table equality function for data reference relation.  */
+
+inline bool
+ddr_entry_hasher::equal (const ddr_entry *entry1, const ddr_entry *entry2)
+{
+  return (entry1->hash == entry2->hash
+ && DR_STMT (entry1->a) == DR_STMT (entry2->a)
+ && DR_STMT (entry1->b) == DR_STMT (entry2->b)
+ && operand_equal_p (DR_REF (entry1->a), DR_REF (entry2->a), 0)
+ && operand_equal_p (DR_REF (entry1->b), DR_REF (entry2->b), 0));
+}

what's the issue with using hash_table  with a custom hasher?
That is, simply key on the dataref pointers (hash them, compare those
for equality)?

Your scheme looks too complicated / expensive to me ...

You can drop ddrs_vec needed only for memory removal if you traverse
the hashtable.

Richard.

> Thanks,
> bin
>
> 2017-06-07  Bin Cheng  
>
> * tree-loop-distribution.c (struct ddr_entry, ddr_entry_hasher): New.
> (ddr_entry_hasher::hash, ::equal, get_data_dependence): New function.
> (ddrs_vec, ddrs_table): New.
> (classify_partition): Call get_data_dependence.
> (pg_add_dependence_edges): Ditto.
> (distribute_loop): Initialize data dependence global variables.


Re: [PATCH GCC][12/13]Workaround reduction statements for distribution

2017-06-16 Thread Richard Biener
On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng  wrote:
> Hi,
> For now, loop distribution handles variables used outside of loop as 
> reduction.
> This is inaccurate because all partitions contain statement defining induction
> vars.

But final induction values are usually not used outside of the loop...

What is missing is loop distribution trying to change partition order.  In fact
we somehow assume we can move a reduction across a detected builtin
(I don't remember if we ever check for validity of that...).

> Ideally we should factor out scev-propagation as a standalone interface
> which can be called when necessary.  Before that, this patch simply 
> workarounds
> reduction issue by checking if the statement belongs to all partitions.  If 
> yes,
> the reduction must be computed in the last partition no matter how the loop is
> distributed.
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

stmt_in_all_partitions is not kept up-to-date during partition merging and if
merging makes the reduction partition(s) pass the stmt_in_all_partitions
test your simple workaround doesn't work ...

As written it's a valid optimization but can you please note it's limitation in
some comment please?

Also...

+  bitmap_set_range (stmt_in_all_partitions, 0, rdg->n_vertices);
+  rdg_build_partitions (rdg, stmts, , stmt_in_all_partitions);

ick.  Please instead do

   bitmap_copy (smtt_in_all_partitions, partitions[0]->stmts);
   for (i = 1; i < ...)
 bitmap_and_into (stmt_in_all_partitons, partitions[i]->stmts);

Thanks,
Richard.

> Thanks,
> bin
> 2017-06-07  Bin Cheng  
>
> * tree-loop-distribution.c (classify_partition): New parameter and
> better handle reduction statement.
> (rdg_build_partitions): New parameter and record statements belonging
> to all partitions.
> (distribute_loop): Update use of above functions.


Re: [patch] Do not directly change TARGET_MEM_REF in maybe_canonicalize_mem_ref_addr

2017-06-16 Thread Richard Biener
On Fri, Jun 16, 2017 at 12:26 PM, Eric Botcazou  wrote:
>> I don't think so.  get_address_description assumes TMR_BASE is in
>> canonical form,
>> that is, when it is an ADDR_EXPR we have a symbol and when not we have
>> a pointer.
>> TMR[>a] violates this and the gimple-fold.c part first canonicalizes this
>> to TMR[p + offsetof(a)].
>
> get_address_description doesn't assume anything on TMR_BASE:
>
> void
> get_address_description (tree op, struct mem_address *addr)
> {
>   if (TREE_CODE (TMR_BASE (op)) == ADDR_EXPR)
> {
>   addr->symbol = TMR_BASE (op);
>   addr->base = TMR_INDEX2 (op);
> }
>   else
> {
>   addr->symbol = NULL_TREE;
>   if (TMR_INDEX2 (op))
> {
>   gcc_assert (integer_zerop (TMR_BASE (op)));
>   addr->base = TMR_INDEX2 (op);
> }
>   else
> addr->base = TMR_BASE (op);
> }
>   addr->index = TMR_INDEX (op);
>   addr->step = TMR_STEP (op);
>   addr->offset = TMR_OFFSET (op);
> }
>
> and maybe_fold_tmr will precisely turn TMR[>a] into TMR[p + offsetof(a)]:
>
>   if (addr.symbol
>   && TREE_CODE (TREE_OPERAND (addr.symbol, 0)) == MEM_REF)
> {
>   addr.offset = fold_binary_to_constant
> (PLUS_EXPR, TREE_TYPE (addr.offset),
>  addr.offset,
>  TREE_OPERAND (TREE_OPERAND (addr.symbol, 0), 1));
>   addr.symbol = TREE_OPERAND (TREE_OPERAND (addr.symbol, 0), 0);
>   changed = true;
> }
>   else if (addr.symbol
>&& handled_component_p (TREE_OPERAND (addr.symbol, 0)))
> {
>   HOST_WIDE_INT offset;
>   addr.symbol = build_fold_addr_expr
>   (get_addr_base_and_unit_offset
>  (TREE_OPERAND (addr.symbol, 0), ));
>   addr.offset = int_const_binop (PLUS_EXPR,
>  addr.offset, size_int (offset));
>   changed = true;
> }
>
> The transformations are exactly the same in maybe_canonicalize_mem_ref_addr.

Well, it seems this just compensates for the fact
get_address_description is confused
and says it has a symbol when it has not.

I'd rather leave the canonicalization in a single place for both
MEM_REF and TARGET_MEM_REF
and instead remove the above code from maybe_fold_tmr (which is only called from
maybe_canonicalize_mem_ref_addr btw.  Inlining it (and thus exporting
create_mem_ref_raw)
would work for me as well and likely reduce the confusion as to what
is done where.

Richard.

> --
> Eric Botcazou


Re: RFC: [PATCH] Add warn_if_not_aligned attribute

2017-06-16 Thread H.J. Lu
On Thu, Jun 15, 2017 at 05:31:34PM +, Joseph Myers wrote:
> On Thu, 15 Jun 2017, H.J. Lu wrote:
> 
> > On Thu, Jun 15, 2017 at 8:38 AM, Martin Sebor  wrote:
> > >>
> > >> Where do we go from here?
> > >
> > >
> > > Other than the C and C++ maintainers needing to approve the patch
> > > I can't think of anything else.
> > 
> > Hi Joseph, Jason,
> > 
> > The complete patch is at
> > 
> > https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00541.html
> > 
> > Is this OK for trunk?
> 
> I'd expect the warning calls to include OPT_Wif_not_aligned or 
> OPT_Wpacked_not_aligned (as appropriate, depending on what triggered the 
> warning / would disable it), so the warning output includes an option 
> name.
> 

Done.

> As the attribute is specific to fields I'd expect testcases that use of it 

Done.

> on non-fields is diagnosed.  And I think the diagnostic for that should 
> include quotes, %.
> 

Done.

Here is the updated patch.  OK for trunk?

Thanks.


H.J.
---
Add warn_if_not_aligned attribute as well as  command line options:
-Wif-not-aligned and -Wpacked-not-aligned.

__attribute__((warn_if_not_aligned(N))) causes compiler to issue a
warning if the field in a struct or union is not aligned to N:

typedef unsigned long long __u64
  __attribute__((aligned(4),warn_if_not_aligned(8)));

struct foo
{
  int i1;
  int i2;
  __u64 x;
};

__u64 is aligned to 4 bytes.  But inside struct foo, __u64 should be
aligned at 8 bytes.  It is used to define struct foo in such a way that
struct foo has the same layout and x has the same alignment when __u64
is aligned at either 4 or 8 bytes.

Since struct foo is normally aligned to 4 bytes, a warning will be issued:

warning: alignment 4 of 'struct foo' is less than 8

Align struct foo to 8 bytes:

struct foo
{
  int i1;
  int i2;
  __u64 x;
} __attribute__((aligned(8)));

silences the warning.  It also warns the field with misaligned offset:

struct foo
{
  int i1;
  int i2;
  int i3;
  __u64 x;
} __attribute__((aligned(8)));

warning: 'x' offset 12 in 'struct foo' isn't aligned to 8

This warning is controlled by -Wif-not-aligned and is enabled by default.

When -Wpacked-not-aligned is used, the same warning is also issued for
the field with explicitly specified alignment in a packed struct or union:

struct __attribute__ ((aligned (8))) S8 { char a[8]; };
struct __attribute__ ((packed)) S {
  struct S8 s8;
};

warning: alignment 1 of 'struct S' is less than 8

This warning is disabled by default and enabled by -Wall.

gcc/

PR c/53037
* print-tree.c (print_node): Support DECL_WARN_IF_NOT_ALIGN
and TYPE_WARN_IF_NOT_ALIGN.
* stor-layout.c (do_type_align): Merge DECL_WARN_IF_NOT_ALIGN.
(handle_warn_if_not_align): New.
(place_union_field): Call handle_warn_if_not_align.
(place_field): Call handle_warn_if_not_align.  Copy
TYPE_WARN_IF_NOT_ALIGN.
(finish_builtin_struct): Copy TYPE_WARN_IF_NOT_ALIGN.
(layout_type): Likewise.
* tree-core.h (tree_type_common): Add warn_if_not_align.  Set
spare to 18.
(tree_decl_common): Add warn_if_not_align.
* tree.c (build_range_type_1): Copy TYPE_WARN_IF_NOT_ALIGN.
* tree.h (TYPE_WARN_IF_NOT_ALIGN): New.
(SET_TYPE_WARN_IF_NOT_ALIGN): Likewise.
(DECL_WARN_IF_NOT_ALIGN): Likewise.
(SET_DECL_WARN_IF_NOT_ALIGN): Likewise.
* doc/extend.texi: Document warn_if_not_aligned attribute.
* doc/invoke.texi: Document -Wif-not-aligned and
-Wpacked-not-aligned.

gcc/c-family/

PR c/53037
* c-attribs.c (handle_warn_if_not_aligned_attribute): New.
(c_common_attribute_table): Add warn_if_not_aligned.
(handle_aligned_attribute): Renamed to ...
(common_handle_aligned_attribute): Remove argument, name, and add
argument, warn_if_not_aligned.  Handle warn_if_not_aligned.
(handle_aligned_attribute): New.
* c.opt: Add -Wif-not-aligned and -Wpacked-not-aligned.

gcc/c/

PR c/53037
* c-decl.c (merge_decls): Also merge DECL_WARN_IF_NOT_ALIGN.

gcc/cp/

PR c/53037
* decl.c (duplicate_decls): Also merge DECL_WARN_IF_NOT_ALIGN.

gcc/testsuite/

PR c/53037
* c-c++-common/pr53037-5.c: New test.
* g++.dg/pr53037-1.C: Likewise.
* g++.dg/pr53037-2.C: Likewise.
* g++.dg/pr53037-3.C: Likewise.
* g++.dg/pr53037-4.C: Likewise.
* gcc.dg/pr53037-1.c: Likewise.
* gcc.dg/pr53037-2.c: Likewise.
* gcc.dg/pr53037-3.c: Likewise.
* gcc.dg/pr53037-4.c: Likewise.
---
 gcc/c-family/c-attribs.c   | 68 ++
 gcc/c-family/c.opt |  8 
 gcc/c/c-decl.c |  4 ++
 gcc/cp/decl.c  |  4 ++
 gcc/doc/extend.texi| 87 ++
 gcc/doc/invoke.texi| 29 +++-
 

Re: [PATCH GCC][11/13]Annotate partition by its parallelism execution type

2017-06-16 Thread Richard Biener
On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng  wrote:
> Hi,
> This patch checks and records if partition can be executed in parallel by
> looking if there exists data dependence cycles.  The information is needed
> for distribution because the idea is to distribute parallel type partitions
> away from sequential ones.  I believe current distribution doesn't work
> very well because it does blind distribution/fusion.
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

+  /* In case of no data dependence.  */
+  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+return false;
+  /* Or the data dependence can be resolved by compilation time alias
+ check.  */
+  else if (!alias_sets_conflict_p (get_alias_set (DR_REF (dr1)),
+  get_alias_set (DR_REF (dr2
+return false;

dependence analysis should use TBAA already, in which cases do you need this?
It seems to fall foul of the easy mistake of not honoring GCCs memory model
as well ... see dr_may_alias_p.

+  /* Further check if any data dependence prevents us from executing the
+ partition parallelly.  */
+  EXECUTE_IF_SET_IN_BITMAP (partition->reads, 0, i, bi)
+{
+  dr1 = (*datarefs_vec)[i];
+  EXECUTE_IF_SET_IN_BITMAP (partition->writes, 0, j, bj)
+   {

what about write-write dependences?

+  EXECUTE_IF_SET_IN_BITMAP (partition->reads, 0, i, bi)
+{
+  dr1 = (*datarefs_vec)[i];
+  EXECUTE_IF_SET_IN_BITMAP (partition->writes, i + 1, j, bj)
+   {
+ dr2 = (*datarefs_vec)[j];
+ /* Partition can only be executed sequentially if there is any
+data dependence cycle.  */

exact copy of the loop nest follows?!  Maybe you meant to iterate
over writes in the first loop.

Richard.


> Thanks,
> bin
> 2017-06-07  Bin Cheng  
>
> * tree-loop-distribution.c (alias.h): Include header file.
> (enum partition_type): New.
> (struct partition): New field type.
> (partition_merge_into): Update partition type.
> (data_dep_in_cycle_p): New function.
> (build_rdg_partition_for_vertex): Compute partition type.
> (rdg_build_partitions): Dump partition type.


Make profile update in inlining more robust

2017-06-16 Thread Jan Hubicka
Hi,
when inlining last copy of function, we assume that the counts are already 
correct,
because all other execution paths has been inlined earlier.  While this is true 
in
simple cases of static functions, it can be wrong for comdats where we optimize
out the last copy but during training run we may have collected execution counts
from other units.  This could then resuit in quite large profile inconsistencies
and thus it is better to fix.

Of course this is not perfect if some offline copy of function is later 
produced,
because that one will have zero counts.  I do not think there is easy solution
for this problem except for LTO or possibly arranging profile counts of 
different
copies of same comdat to be shared which we don't have infrastructure for.

Bootstrapped/regtested x86_64-linux,
comitted.

Honza

* ipa-inline-transform.c (update_noncloned_frequencies): Update also
counts.
(clone_inlined_nodes): Update.
Index: ipa-inline-transform.c
===
--- ipa-inline-transform.c  (revision 249227)
+++ ipa-inline-transform.c  (working copy)
@@ -54,10 +54,12 @@ int nfunctions_inlined;
 /* Scale frequency of NODE edges by FREQ_SCALE.  */
 
 static void
-update_noncloned_frequencies (struct cgraph_node *node,
- int freq_scale)
+update_noncloned_frequencies (struct cgraph_node *node, 
+ int freq_scale, profile_count num,
+ profile_count den)
 {
   struct cgraph_edge *e;
+  bool scale = (num == profile_count::zero () || den > 0);
 
   /* We do not want to ignore high loop nest after freq drops to 0.  */
   if (!freq_scale)
@@ -68,14 +70,20 @@ update_noncloned_frequencies (struct cgr
   if (e->frequency > CGRAPH_FREQ_MAX)
 e->frequency = CGRAPH_FREQ_MAX;
   if (!e->inline_failed)
-update_noncloned_frequencies (e->callee, freq_scale);
+update_noncloned_frequencies (e->callee, freq_scale, num, den);
+  if (scale)
+   e->count = e->count.apply_scale (num, den);
 }
   for (e = node->indirect_calls; e; e = e->next_callee)
 {
   e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
   if (e->frequency > CGRAPH_FREQ_MAX)
 e->frequency = CGRAPH_FREQ_MAX;
+  if (scale)
+   e->count = e->count.apply_scale (num, den);
 }
+  if (scale)
+node->count = node->count.apply_scale (num, den);
 }
 
 /* We removed or are going to remove the last call to NODE.
@@ -212,7 +220,8 @@ clone_inlined_nodes (struct cgraph_edge
}
  duplicate = false;
  e->callee->externally_visible = false;
-  update_noncloned_frequencies (e->callee, e->frequency);
+  update_noncloned_frequencies (e->callee, e->frequency,
+   e->count, e->callee->count);
 
  dump_callgraph_transformation (e->callee, inlining_into,
 "inlining to");


Re: [Patch, Fortran, F03] PR 80983: memory leak when calling procedure-pointer component with allocatable result

2017-06-16 Thread Janus Weil
2017-06-16 11:11 GMT+02:00 Christophe Lyon :
> Hi,
>
> On 15 June 2017 at 23:18, Janus Weil  wrote:
>> 2017-06-15 15:33 GMT+02:00 Thomas Koenig :
>>> Hi Janus,
>>>
 It regtests cleanly on x86_64-linux-gnu. Ok for trunk?
>>>
>>>
>>> OK.
>>>
>>> Thanks for the patch!
>>
>> Thanks, Thomas! Committed as r249227.
>>
>
> There's a problem with the new test. It says:
> gfortran.dg/proc_ptr_comp_51.f90   -O  : dump file does not exist
> UNRESOLVED: gfortran.dg/proc_ptr_comp_51.f90   -O
> scan-tree-dump-times original "__builtin_free" 3
>
> You probably either want to add:
> ! { dg-options "-fdump-tree-original" }
> or to remove:
> ! { dg-final { scan-tree-dump-times "__builtin_free" 3 "original" } }

Thanks for your attention, Christophe. I wanted the first of these two
options, but apparently forgot that line ... :(

Fixed with r249243.

Cheers,
Janus


Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Richard Biener
On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng  wrote:
> Hi,
> Loop split forces intermediate computation to gimple operands all the time 
> when
> computing bound information.  This is not good since folding opportunities are
> missed.  This patch fixes the issue by feeding all computation to folder and 
> only
> forcing to gimple operand at last.
>
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

Hm?  It uses gimple_build () which should do the same as fold_buildN in terms
of simplification.

So where does that not work?  It is supposed to be the prefered way and no
new code should use force_gimple_operand (unless dealing with generic
coming from other middle-end infrastructure like SCEV or niter analysis)

Richard.

>
> Thanks,
> bin
> 2017-06-12  Bin Cheng  
>
> * tree-ssa-loop-split.c (compute_new_first_bound): Feed bound
> computation to folder, rather than force to gimple operands too
> early.


Re: [Patch ARM] Fix PR71778

2017-06-16 Thread Kyrill Tkachov


On 16/06/17 10:07, James Greenhalgh wrote:

On Wed, Jun 14, 2017 at 11:21:30AM +0100, Kyrill Tkachov wrote:

   <...>


That movv2di expander is the one in vec-common.md that ends up calling
neon_make_constant. I wonder why const0_rtx passed its predicate check
(that would require a V2DImode vector of zeroes rather than a const0_rtx).
Perhaps the midend code at this point doesn't check the operand predicate.

In the builtin expansion code that you quoted I wonder wonder if we could fail
more gracefully by returning CONST0_RTX (mode[argc]) to match the expected
mode of the operand (we've already emitted an error, so we shouldn't care
what RTL we emit as long as it doesn't cause an ICE).

   <...>


diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e503891..b8d59c6 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12124,6 +12124,11 @@ neon_make_constant (rtx vals)
if (n_const == n_elts)
const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
  }
+  else if (vals == const0_rtx)
+/* Something invalid, perhaps from expanding an intrinsic
+   which requires a constant argument, where a variable argument
+   was passed.  */
+ return const0_rtx;
else
  gcc_unreachable ();

I'm not a fan of this as the function has a precondition that its argument is
a PARALLEL or a CONST_VECTOR and special-casing const0_rtx breaks that. I'd
rather we tried fixing this closer to the error source.  Can you try the
suggestion above instead please?

Your suggestion doesn't quite work, but this is pretty close to it. Rather
than try to guess at the correct mode for CONST0_RTX (we can't just use
mode[argc] as that will get you the scalar mode), we can just return target
directly. That will ensure we've given something valid back in the correct
mode, even if it is not all that useful.


Yeah, that actually looks better.


Bootstrapped on arm-none-linux-gnueabihf. OK?


Ok.
Thanks,
Kyrill



Thanks,
James

---
gcc/

2017-06-15  James Greenhalgh  

PR target/71778
* config/arm/arm-builtins.c (arm_expand_builtin_args): Return TARGET
if given a non-constant argument for an intrinsic which requires a
constant.

gcc/testsuite/

2017-06-15  James Greenhalgh  

PR target/71778
* gcc.target/arm/pr71778.c: New.





Re: [patch] Do not directly change TARGET_MEM_REF in maybe_canonicalize_mem_ref_addr

2017-06-16 Thread Eric Botcazou
> I don't think so.  get_address_description assumes TMR_BASE is in
> canonical form,
> that is, when it is an ADDR_EXPR we have a symbol and when not we have
> a pointer.
> TMR[>a] violates this and the gimple-fold.c part first canonicalizes this
> to TMR[p + offsetof(a)].

get_address_description doesn't assume anything on TMR_BASE:

void
get_address_description (tree op, struct mem_address *addr)
{
  if (TREE_CODE (TMR_BASE (op)) == ADDR_EXPR)
{
  addr->symbol = TMR_BASE (op);
  addr->base = TMR_INDEX2 (op);
}
  else
{
  addr->symbol = NULL_TREE;
  if (TMR_INDEX2 (op))
{
  gcc_assert (integer_zerop (TMR_BASE (op)));
  addr->base = TMR_INDEX2 (op);
}
  else
addr->base = TMR_BASE (op);
}
  addr->index = TMR_INDEX (op);
  addr->step = TMR_STEP (op);
  addr->offset = TMR_OFFSET (op);
}

and maybe_fold_tmr will precisely turn TMR[>a] into TMR[p + offsetof(a)]:

  if (addr.symbol
  && TREE_CODE (TREE_OPERAND (addr.symbol, 0)) == MEM_REF)
{
  addr.offset = fold_binary_to_constant
(PLUS_EXPR, TREE_TYPE (addr.offset),
 addr.offset,
 TREE_OPERAND (TREE_OPERAND (addr.symbol, 0), 1));
  addr.symbol = TREE_OPERAND (TREE_OPERAND (addr.symbol, 0), 0);
  changed = true;
}
  else if (addr.symbol
   && handled_component_p (TREE_OPERAND (addr.symbol, 0)))
{
  HOST_WIDE_INT offset;
  addr.symbol = build_fold_addr_expr
  (get_addr_base_and_unit_offset
 (TREE_OPERAND (addr.symbol, 0), ));
  addr.offset = int_const_binop (PLUS_EXPR,
 addr.offset, size_int (offset));
  changed = true;
}

The transformations are exactly the same in maybe_canonicalize_mem_ref_addr.

-- 
Eric Botcazou


Re: [PATCH] Enhance dump_probability function.

2017-06-16 Thread Jan Hubicka
> Hi.
> 
> This is pre-approved patch that displays edge counts in dump files:
> 
> ...
>   _85 = _83 + _84;
>   len_86 = SQRT (_85);
>   if (_85 u>= 0.0)
> goto ; [99.00%] [count: 778568]
>   else
> goto ; [1.00%] [count: 7864]
> 
>[0.01%] [count: 7864]:
>   sqrt (_85);
> ...
> 
> That makes it possible to understand why a profile mismatch happens.
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
> 
> Martin
> 
> gcc/ChangeLog:
> 
> 2017-06-12  Martin Liska  
> 
>   * gimple-pretty-print.c (dump_probability): Add new argument.
>   (dump_edge_probability): Dump both probability and count.
>   (dump_gimple_label): Likewise.
>   (dump_gimple_bb_header): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 2017-06-12  Martin Liska  
> 
>   * gcc.dg/tree-ssa/builtin-sprintf-2.c: Adjust scanned pattern.
>   * gcc.dg/tree-ssa/dump-2.c: Likewise.
>   * gcc.dg/tree-ssa/vrp101.c: Likewise.

OK 
Honza
> ---
>  gcc/gimple-pretty-print.c | 22 ++
>  gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c |  4 ++--
>  gcc/testsuite/gcc.dg/tree-ssa/dump-2.c|  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/vrp101.c|  2 +-
>  4 files changed, 18 insertions(+), 12 deletions(-)
> 
> 

> diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
> index 5ff63a167a1..447921be036 100644
> --- a/gcc/gimple-pretty-print.c
> +++ b/gcc/gimple-pretty-print.c
> @@ -80,17 +80,22 @@ debug_gimple_stmt (gimple *gs)
> by xstrdup_for_dump.  */
>  
>  static const char *
> -dump_probability (int value)
> +dump_probability (int frequency, profile_count )
>  {
>float minimum = 0.01f;
>  
> -  gcc_assert (0 <= value && value <= REG_BR_PROB_BASE);
> -  float fvalue = value * 100.0f / REG_BR_PROB_BASE;
> -  if (fvalue < minimum && value > 0)
> +  gcc_assert (0 <= frequency && frequency <= REG_BR_PROB_BASE);
> +  float fvalue = frequency * 100.0f / REG_BR_PROB_BASE;
> +  if (fvalue < minimum && frequency > 0)
>  return "[0.01%]";
>  
>char *buf;
> -  asprintf (, "[%.2f%%]", fvalue);
> +  if (count.initialized_p ())
> +asprintf (, "[%.2f%%] [count: %" PRId64 "]", fvalue,
> +   count.to_gcov_type ());
> +  else
> +asprintf (, "[%.2f%%] [count: INV]", fvalue);
> +
>const char *ret = xstrdup_for_dump (buf);
>free (buf);
>  
> @@ -102,7 +107,7 @@ dump_probability (int value)
>  static void
>  dump_edge_probability (pretty_printer *buffer, edge e)
>  {
> -  pp_scalar (buffer, " %s", dump_probability (e->probability));
> +  pp_scalar (buffer, " %s", dump_probability (e->probability, e->count));
>  }
>  
>  /* Print GIMPLE statement G to FILE using SPC indentation spaces and
> @@ -1085,7 +1090,7 @@ dump_gimple_label (pretty_printer *buffer, glabel *gs, 
> int spc,
>dump_generic_node (buffer, label, spc, flags, false);
>basic_block bb = gimple_bb (gs);
>if (bb && !(flags & TDF_GIMPLE))
> - pp_scalar (buffer, " %s", dump_probability (bb->frequency));
> + pp_scalar (buffer, " %s", dump_probability (bb->frequency, bb->count));
>pp_colon (buffer);
>  }
>if (flags & TDF_GIMPLE)
> @@ -2665,7 +2670,8 @@ dump_gimple_bb_header (FILE *outf, basic_block bb, int 
> indent,
>   fprintf (outf, "%*sbb_%d:\n", indent, "", bb->index);
> else
>   fprintf (outf, "%*s %s:\n",
> -  indent, "", bb->index, dump_probability (bb->frequency));
> +  indent, "", bb->index, dump_probability (bb->frequency,
> +   bb->count));
>   }
>  }
>  }
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c
> index 8a13f33d2a1..e15d88b7341 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-2.c
> @@ -290,7 +290,7 @@ RNG (0,  6,   8, "%s%ls", "1", L"2");
>  
>  /*  Only conditional calls to must_not_eliminate must be made (with
>  any probability):
> -{ dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\]:\n 
> *must_not_eliminate" 127 "optimized" { target { ilp32 || lp64 } } } }
> -{ dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\]:\n 
> *must_not_eliminate" 96 "optimized" { target { { ! ilp32 } && { ! lp64 } } } 
> } }
> +{ dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\]\\ 
> \\\[count:\\[^:\\]*\\\]:\n *must_not_eliminate" 127 "optimized" { target { 
> ilp32 || lp64 } } } }
> +{ dg-final { scan-tree-dump-times "> \\\[\[0-9.\]+%\\\]\\ 
> \\\[count:\\[^:\\]*\\\]:\n *must_not_eliminate" 96 "optimized" { target { { ! 
> ilp32 } && { ! lp64 } } } } }
>  No unconditional calls to abort should be made:
>  { dg-final { scan-tree-dump-not ";\n *must_not_eliminate" "optimized" } 
> } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dump-2.c 
> 

Re: [PATCH] Fix PR81090, properly free niter estimates

2017-06-16 Thread Richard Biener
On Wed, 14 Jun 2017, Richard Biener wrote:

> 
> niter estimates are not kept up-to-date (they reference gimple stmts
> and trees) in the keep-loop-stuff infrastructure so similar to the
> SCEV cache we rely on people freeing it after passes.
> 
> The following brings us a step closer to that by freeing them whenever
> SCEV is invalidated (we only compute them when SCEV is active) plus
> removing the odd record-bounds pass that just computes them, leaving
> scavenging to following passes.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Some awkward interactions with peeling means I'm installing the
following less aggressive variant.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-06-16  Richard Biener  

PR tree-optimization/81090
* passes.def (pass_record_bounds): Remove.
* tree-pass.h (make_pass_record_bounds): Likewise.
* tree-ssa-loop.c (pass_data_record_bounds, pass_record_bounds,
make_pass_record_bounds): Likewise.
* tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Do
not free niter estimates at the beginning but at the end.
* tree-scalar-evolution.c (scev_finalize): Free niter estimates.

* gcc.dg/graphite/pr81090.c: New testcase.

Index: gcc/passes.def
===
--- gcc/passes.def  (revision 249246)
+++ gcc/passes.def  (working copy)
@@ -276,7 +276,6 @@ along with GCC; see the file COPYING3.
  /* All unswitching, final value replacement and splitting can expose
 empty loops.  Remove them now.  */
  NEXT_PASS (pass_cd_dce);
- NEXT_PASS (pass_record_bounds);
  NEXT_PASS (pass_iv_canon);
  NEXT_PASS (pass_loop_distribution);
  NEXT_PASS (pass_copy_prop);
Index: gcc/testsuite/gcc.dg/graphite/pr81090.c
===
--- gcc/testsuite/gcc.dg/graphite/pr81090.c (nonexistent)
+++ gcc/testsuite/gcc.dg/graphite/pr81090.c (working copy)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -floop-nest-optimize" } */
+
+int x3, za;
+int hg[1];
+
+void
+yw (int dq)
+{
+  const int r7 = 2;
+
+  while (dq < 1)
+{
+  for (x3 = 0; x3 < r7; ++x3)
+   for (za = 0; za < r7; ++za)
+ hg[1] = 0;
+  ++dq;
+}
+
+  x3 = 0;
+  while (x3 < r7)
+{
+  ++x3;
+  if (x3 == 0)
+   break;
+}
+}
Index: gcc/tree-pass.h
===
--- gcc/tree-pass.h (revision 249246)
+++ gcc/tree-pass.h (working copy)
@@ -373,7 +373,6 @@ extern gimple_opt_pass *make_pass_predco
 extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_empty_loop (gcc::context *ctxt);
-extern gimple_opt_pass *make_pass_record_bounds (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_graphite_transforms (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt);
Index: gcc/tree-scalar-evolution.c
===
--- gcc/tree-scalar-evolution.c (revision 249246)
+++ gcc/tree-scalar-evolution.c (working copy)
@@ -3636,6 +3636,7 @@ scev_finalize (void)
 return;
   scalar_evolution_info->empty ();
   scalar_evolution_info = NULL;
+  free_numbers_of_iterations_estimates (cfun);
 }
 
 /* Returns true if the expression EXPR is considered to be too expensive
Index: gcc/tree-ssa-loop-ivcanon.c
===
--- gcc/tree-ssa-loop-ivcanon.c (revision 249246)
+++ gcc/tree-ssa-loop-ivcanon.c (working copy)
@@ -1212,7 +1212,6 @@ canonicalize_induction_variables (void)
   bool irred_invalidated = false;
   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
 
-  free_numbers_of_iterations_estimates (cfun);
   estimate_numbers_of_iterations ();
 
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
@@ -1230,6 +1229,7 @@ canonicalize_induction_variables (void)
 
   /* Clean up the information about numbers of iterations, since brute force
  evaluation could reveal new information.  */
+  free_numbers_of_iterations_estimates (cfun);
   scev_reset ();
 
   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
Index: gcc/tree-ssa-loop.c
===
--- gcc/tree-ssa-loop.c (revision 249246)
+++ gcc/tree-ssa-loop.c (working copy)
@@ -459,54 +459,6 @@ make_pass_scev_cprop (gcc::context *ctxt
   return new pass_scev_cprop (ctxt);
 }
 
-/* Record bounds on numbers of iterations of loops.  */
-
-namespace {
-
-const pass_data pass_data_record_bounds =
-{
-  GIMPLE_PASS, /* type */
-  "*record_bounds", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  

[PATCH] Add std::get_deleter overload with correct signature

2017-06-16 Thread Jonathan Wakely

This was found by a third-party testsuite, which complained because we
only define get_deleter(const __shared_ptr&) and not the corect
signature.

* include/bits/shared_ptr.h (get_deleter): Add overload matching
standard signature.
* include/bits/shared_ptr_base.h (__shared_ptr): Declare new
get_deleter overload as a friend.
* testsuite/20_util/shared_ptr/misc/get_deleter.cc: New.

Tested powerpc64le-linux, committed to trunk.


commit 19e9cbdc902495edd290c749cc76976a9bdec12d
Author: Jonathan Wakely 
Date:   Fri Apr 21 14:26:43 2017 +0100

Add std::get_deleter overload with correct signature

* include/bits/shared_ptr.h (get_deleter): Add overload matching
standard signature.
* include/bits/shared_ptr_base.h (__shared_ptr): Declare new
get_deleter overload as a friend.
* testsuite/20_util/shared_ptr/misc/get_deleter.cc: New.

diff --git a/libstdc++-v3/include/bits/shared_ptr.h 
b/libstdc++-v3/include/bits/shared_ptr.h
index 264e35c..999a034 100644
--- a/libstdc++-v3/include/bits/shared_ptr.h
+++ b/libstdc++-v3/include/bits/shared_ptr.h
@@ -70,7 +70,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return __os;
 }
 
-  /// 20.7.2.2.10 shared_ptr get_deleter
   template
 inline _Del*
 get_deleter(const __shared_ptr<_Tp, _Lp>& __p) noexcept
@@ -82,6 +81,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
 }
 
+  /// 20.7.2.2.10 shared_ptr get_deleter
+  template
+inline _Del*
+get_deleter(const shared_ptr<_Tp>& __p) noexcept
+{
+#if __cpp_rtti
+  return static_cast<_Del*>(__p._M_get_deleter(typeid(_Del)));
+#else
+  return 0;
+#endif
+}
 
   /**
*  @brief  A smart pointer with reference-counted copy semantics.
diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index a07058c..7e6766b 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -1402,6 +1402,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
friend _Del* get_deleter(const __shared_ptr<_Tp1, _Lp1>&) noexcept;
 
+  template
+   friend _Del* get_deleter(const shared_ptr<_Tp1>&) noexcept;
+
   element_type*   _M_ptr; // Contained pointer.
   __shared_count<_Lp>  _M_refcount;// Reference counter.
 };
diff --git a/libstdc++-v3/testsuite/20_util/shared_ptr/misc/get_deleter.cc 
b/libstdc++-v3/testsuite/20_util/shared_ptr/misc/get_deleter.cc
new file mode 100644
index 000..cb5f4c7
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/shared_ptr/misc/get_deleter.cc
@@ -0,0 +1,44 @@
+// Copyright (C) 2017 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do run { target c++11 } }
+
+#include 
+#include 
+
+struct Del {
+  template void operator()(T* p) const noexcept { delete p; }
+};
+
+Del* (*f1)(const std::shared_ptr&) = std::get_deleter;
+
+void
+test01()
+{
+  std::shared_ptr p;
+  VERIFY( std::get_deleter(p) == nullptr );
+  p = std::shared_ptr(new int, Del());
+  VERIFY( std::get_deleter(p) != nullptr );
+  p = std::shared_ptr(new int);
+  VERIFY( std::get_deleter(p) == nullptr );
+}
+
+int
+main()
+{
+  test01();
+}


Re: [PATCH] PR libstdc++/81092 add std::wstring symbols and bump library version

2017-06-16 Thread Jakub Jelinek
On Wed, Jun 14, 2017 at 07:14:29PM +0100, Jonathan Wakely wrote:
> On 14/06/17 19:13 +0100, Jonathan Wakely wrote:
> > There are two symbols defined in GCC 7.1's libstdc++.6.0.23 library
> > which are not exported on all targets (because I wrote "m" in the
> > linker script instead of "[jmy]").
> > 
> > This patch bumps the library version on gcc-7-branch to 6.0.24 and
> > exports the "[jy]" versions of the symbols with version the new
> > GLIBCXX_3.4.24 symbol version.
> > 
> > This requires bumping the version on trunk to 6.0.25 and moving the
> > new random_device::_M_get_entropy() symbol to GLIBCXX_3.4.25 (which
> > will be done by the patch in the following mail).
> 
> Here's the patch for trunk.
> 
> Target maintainers will need to regenerate the baseline symbols on
> gcc-7-branch and trunk.
> 
> I intend to commit this tomorrow to trunk.
> 
> 
> 

> commit 9354adee0dccc3ebc2aff07c3bdac3eb8ccdbda8
> Author: Jonathan Wakely 
> Date:   Wed Jun 14 14:06:40 2017 +0100
> 
> PR libstdc++/81092 add std::wstring symbols and bump library version
> 
>   PR libstdc++/81092
>   * acinclude.m4: Bump libtool_VERSION.
>   * config/abi/post/i386-linux-gnu/baseline_symbols.txt: Update.
>   * config/abi/post/x86_64-linux-gnu/32/baseline_symbols.txt: Update.

Here is a corresponding patch for i486 and powerpc 32-bit, ok for trunk?

2017-06-16  Jakub Jelinek  

PR libstdc++/81092
* config/abi/post/i486-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/powerpc-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/powerpc64-linux-gnu/32/baseline_symbols.txt: Update.

--- libstdc++-v3/config/abi/post/i486-linux-gnu/baseline_symbols.txt.jj 
2017-02-13 18:08:44.0 +0100
+++ libstdc++-v3/config/abi/post/i486-linux-gnu/baseline_symbols.txt
2017-06-16 14:15:32.431090847 +0200
@@ -444,6 +444,7 @@ FUNC:_ZNKSt13basic_fstreamIwSt11char_tra
 FUNC:_ZNKSt13basic_istreamIwSt11char_traitsIwEE6gcountEv@@GLIBCXX_3.4
 FUNC:_ZNKSt13basic_istreamIwSt11char_traitsIwEE6sentrycvbEv@@GLIBCXX_3.4
 FUNC:_ZNKSt13basic_ostreamIwSt11char_traitsIwEE6sentrycvbEv@@GLIBCXX_3.4
+FUNC:_ZNKSt13random_device13_M_getentropyEv@@GLIBCXX_3.4.25
 FUNC:_ZNKSt13runtime_error4whatEv@@GLIBCXX_3.4
 FUNC:_ZNKSt14basic_ifstreamIcSt11char_traitsIcEE5rdbufEv@@GLIBCXX_3.4
 FUNC:_ZNKSt14basic_ifstreamIcSt11char_traitsIcEE7is_openEv@@GLIBCXX_3.4.5
@@ -1329,6 +1330,7 @@ FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1EP
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1EPKwjRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jRKS1_@@GLIBCXX_3.4.24
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jj@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jjRKS1_@@GLIBCXX_3.4
 
FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ESt16initializer_listIwERKS1_@@GLIBCXX_3.4.11
@@ -1342,6 +1344,7 @@ FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2EP
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2EPKwjRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_jRKS1_@@GLIBCXX_3.4.24
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_jj@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_jjRKS1_@@GLIBCXX_3.4
 
FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ESt16initializer_listIwERKS1_@@GLIBCXX_3.4.11
@@ -4002,6 +4005,8 @@ OBJECT:0:GLIBCXX_3.4.20
 OBJECT:0:GLIBCXX_3.4.21
 OBJECT:0:GLIBCXX_3.4.22
 OBJECT:0:GLIBCXX_3.4.23
+OBJECT:0:GLIBCXX_3.4.24
+OBJECT:0:GLIBCXX_3.4.25
 OBJECT:0:GLIBCXX_3.4.3
 OBJECT:0:GLIBCXX_3.4.4
 OBJECT:0:GLIBCXX_3.4.5
--- libstdc++-v3/config/abi/post/powerpc-linux-gnu/baseline_symbols.txt.jj  
2017-06-07 08:39:57.0 +0200
+++ libstdc++-v3/config/abi/post/powerpc-linux-gnu/baseline_symbols.txt 
2017-06-16 16:04:35.591285123 +0200
@@ -444,7 +444,7 @@ FUNC:_ZNKSt13basic_fstreamIwSt11char_tra
 FUNC:_ZNKSt13basic_istreamIwSt11char_traitsIwEE6gcountEv@@GLIBCXX_3.4
 FUNC:_ZNKSt13basic_istreamIwSt11char_traitsIwEE6sentrycvbEv@@GLIBCXX_3.4
 FUNC:_ZNKSt13basic_ostreamIwSt11char_traitsIwEE6sentrycvbEv@@GLIBCXX_3.4
-FUNC:_ZNKSt13random_device13_M_getentropyEv@@GLIBCXX_3.4.24
+FUNC:_ZNKSt13random_device13_M_getentropyEv@@GLIBCXX_3.4.25
 FUNC:_ZNKSt13runtime_error4whatEv@@GLIBCXX_3.4
 FUNC:_ZNKSt14basic_ifstreamIcSt11char_traitsIcEE5rdbufEv@@GLIBCXX_3.4
 FUNC:_ZNKSt14basic_ifstreamIcSt11char_traitsIcEE7is_openEv@@GLIBCXX_3.4.5
@@ -1472,6 +1472,7 @@ FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1EP
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1EPKwjRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jRKS1_@@GLIBCXX_3.4.24
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jj@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jjRKS1_@@GLIBCXX_3.4
 

Re: [PATCH v3][AArch64] Fix symbol offset limit

2017-06-16 Thread Nathan Sidwell

On 06/15/2017 03:52 PM, Joseph Myers wrote:


For example, given (array + (i - INT_MAX)), it's quite possible the
compiler could create a relocation for array - INT_MAX, and the original
expression is perfectly OK if i == INT_MAX even though array - INT_MAX is
far out of range.  (And array - INT_MAX as a C expression is only
undefined at runtime, not at compile time if it's in code that is never
executed.)


Some targets (typically uclinux-like things) cannot support this, as 
they move the data segment relative to the text segment upon loading, 
and need to know whether an address is text-relative or data-relative 
(and symbol information may not be available).


There's a target hook for that, but I can't find it now.

nathan

--
Nathan Sidwell


Re: [patch, libfortran] Speed up cshift for dim > 1

2017-06-16 Thread Dominique d'Humières
Hi Thomas,

Your patch works as advertised! For the record, the following CSHIFT is still 4 
times slower than the DO loop

td(:,:) = cshift(array=t(:,:), shift=vect(:), dim=1)

Thanks for working on this issue.

Dominique



Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Richard Biener
On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng  wrote:
> On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
>  wrote:
>> On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng  wrote:
>>> Hi,
>>> Loop split forces intermediate computation to gimple operands all the time 
>>> when
>>> computing bound information.  This is not good since folding opportunities 
>>> are
>>> missed.  This patch fixes the issue by feeding all computation to folder 
>>> and only
>>> forcing to gimple operand at last.
>>>
>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>
>> Hm?  It uses gimple_build () which should do the same as fold_buildN in terms
>> of simplification.
>>
>> So where does that not work?  It is supposed to be the prefered way and no
>> new code should use force_gimple_operand (unless dealing with generic
>> coming from other middle-end infrastructure like SCEV or niter analysis)
> Hmm, current code calls force_gimpele operand several times which
> causes the inefficiency.  The patch avoids that and does one call at
> the end.

But it forces to the same sequence that is used for extending the expression
so folding should work.  Where do you see that it does not?  Note the
code uses gimple_build (), not gimple_build_assign ().

Richard.

> Thanks,
> bin
>>
>> Richard.
>>
>>>
>>> Thanks,
>>> bin
>>> 2017-06-12  Bin Cheng  
>>>
>>> * tree-ssa-loop-split.c (compute_new_first_bound): Feed bound
>>> computation to folder, rather than force to gimple operands too
>>> early.


[C++ PATCH] deferred noexcept spec

2017-06-16 Thread Nathan Sidwell
We defer creating the deferred noexept spec.  That's somewhat more 
deferred than necessary.  This patch creates the defered spec at the 
point we create the noexcept true and false specs.


Applied to trunk.

nathan
--
Nathan Sidwell
2017-06-16  Nathan Sidwell  

	Don't defer noexcept_deferred_spec.
	* cp-tree.h (unevaluated_noexcept_spec): Don't declare.
	(cxx_init_decl_processing): Initialize noexcept_deferred_spec.
	* except.c (unevaluated_noexcept_spec): Delete.
	* class.c (deduce_noexcept_on_destructor): Use
	noexcept_deferred_spec directly.
	* method.c (implicitly_declare_fn): Likewise.

Index: class.c
===
--- class.c	(revision 249263)
+++ class.c	(working copy)
@@ -5025,10 +5025,8 @@ void
 deduce_noexcept_on_destructor (tree dtor)
 {
   if (!TYPE_RAISES_EXCEPTIONS (TREE_TYPE (dtor)))
-{
-  tree eh_spec = unevaluated_noexcept_spec ();
-  TREE_TYPE (dtor) = build_exception_variant (TREE_TYPE (dtor), eh_spec);
-}
+TREE_TYPE (dtor) = build_exception_variant (TREE_TYPE (dtor),
+		noexcept_deferred_spec);
 }
 
 /* For each destructor in T, deduce noexcept:
Index: cp-tree.h
===
--- cp-tree.h	(revision 249263)
+++ cp-tree.h	(working copy)
@@ -6317,7 +6317,6 @@ extern tree forward_parm			(tree);
 extern bool is_trivially_xible			(enum tree_code, tree, tree);
 extern bool is_xible(enum tree_code, tree, tree);
 extern tree get_defaulted_eh_spec		(tree);
-extern tree unevaluated_noexcept_spec		(void);
 extern void after_nsdmi_defaulted_late_checks   (tree);
 extern bool maybe_explain_implicit_delete	(tree);
 extern void explain_implicit_non_constexpr	(tree);
Index: decl.c
===
--- decl.c	(revision 249263)
+++ decl.c	(working copy)
@@ -4078,6 +4078,8 @@ cxx_init_decl_processing (void)
   empty_except_spec = build_tree_list (NULL_TREE, NULL_TREE);
   noexcept_true_spec = build_tree_list (boolean_true_node, NULL_TREE);
   noexcept_false_spec = build_tree_list (boolean_false_node, NULL_TREE);
+  noexcept_deferred_spec = build_tree_list (make_node (DEFERRED_NOEXCEPT),
+	NULL_TREE);
 
 #if 0
   record_builtin_type (RID_MAX, NULL, string_type_node);
Index: except.c
===
--- except.c	(revision 249255)
+++ except.c	(working copy)
@@ -1197,18 +1197,6 @@ build_noexcept_spec (tree expr, int comp
 }
 }
 
-/* Returns a noexcept-specifier to be evaluated later, for an
-   implicitly-declared or explicitly defaulted special member function.  */
-
-tree
-unevaluated_noexcept_spec (void)
-{
-  if (!noexcept_deferred_spec)
-noexcept_deferred_spec
-  = build_noexcept_spec (make_node (DEFERRED_NOEXCEPT), tf_none);
-  return noexcept_deferred_spec;
-}
-
 /* Returns a TRY_CATCH_EXPR that will put TRY_LIST and CATCH_LIST in the
TRY and CATCH locations.  CATCH_LIST must be a STATEMENT_LIST */
 
Index: method.c
===
--- method.c	(revision 249255)
+++ method.c	(working copy)
@@ -2023,7 +2023,7 @@ implicitly_declare_fn (special_function_
 }
   else if (cxx_dialect >= cxx11)
 {
-  raises = unevaluated_noexcept_spec ();
+  raises = noexcept_deferred_spec;
   synthesized_method_walk (type, kind, const_p, NULL, _p,
 			   _p, _p, false,
 			   inherited_ctor, inherited_parms);


Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Bin.Cheng
On Fri, Jun 16, 2017 at 2:10 PM, Richard Biener
 wrote:
> On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng  wrote:
>> On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
>>  wrote:
>>> On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng  wrote:
 Hi,
 Loop split forces intermediate computation to gimple operands all the time 
 when
 computing bound information.  This is not good since folding opportunities 
 are
 missed.  This patch fixes the issue by feeding all computation to folder 
 and only
 forcing to gimple operand at last.

 Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>
>>> Hm?  It uses gimple_build () which should do the same as fold_buildN in 
>>> terms
>>> of simplification.
>>>
>>> So where does that not work?  It is supposed to be the prefered way and no
>>> new code should use force_gimple_operand (unless dealing with generic
>>> coming from other middle-end infrastructure like SCEV or niter analysis)
>> Hmm, current code calls force_gimpele operand several times which
>> causes the inefficiency.  The patch avoids that and does one call at
>> the end.
>
> But it forces to the same sequence that is used for extending the expression
> so folding should work.  Where do you see that it does not?  Note the
> code uses gimple_build (), not gimple_build_assign ().
In spec2k6/hmmer, when building fast_algorithms.c with below command line:
./gcc -Ofast -S fast_algorithms.c -o fast_algorithms.S -fdump-tree-all
-fdump-tree-lsplit
The lsplit dump contains:
   [12.75%]:
  _124 = _197 + 1;
  _123 = _124 + -1;
  _115 = MIN_EXPR <_197, _124>;
Which is generated here.

Thanks,
bin
>
> Richard.
>
>> Thanks,
>> bin
>>>
>>> Richard.
>>>

 Thanks,
 bin
 2017-06-12  Bin Cheng  

 * tree-ssa-loop-split.c (compute_new_first_bound): Feed bound
 computation to folder, rather than force to gimple operands too
 early.


Re: [PATCH] PR libstdc++/81092 add std::wstring symbols and bump library version

2017-06-16 Thread Jonathan Wakely

On 16/06/17 16:20 +0200, Jakub Jelinek wrote:

On Wed, Jun 14, 2017 at 07:13:05PM +0100, Jonathan Wakely wrote:

Target maintainers will need to regenerate the baseline symbols on
gcc-7-branch and trunk.

I intend to commit this tomorrow to gcc-7-branch.





commit 425e1d784d9085f0f8d58ef6a381cc73d564f2f5
Author: Jonathan Wakely 
Date:   Wed Jun 14 14:06:40 2017 +0100

PR libstdc++/81092 add std::wstring symbols and bump library version

PR libstdc++/81092
* acinclude.m4: Bump libtool_VERSION.
* config/abi/post/i386-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/x86_64-linux-gnu/32/baseline_symbols.txt: Update.


Here is corresponding patch for i486 (which is just a copy of i386), ok
for gcc-7-branch?


OK, thanks.




Re: [PATCH] PR libstdc++/81092 add std::wstring symbols and bump library version

2017-06-16 Thread Jonathan Wakely

On 16/06/17 16:21 +0200, Jakub Jelinek wrote:

On Wed, Jun 14, 2017 at 07:14:29PM +0100, Jonathan Wakely wrote:

On 14/06/17 19:13 +0100, Jonathan Wakely wrote:
> There are two symbols defined in GCC 7.1's libstdc++.6.0.23 library
> which are not exported on all targets (because I wrote "m" in the
> linker script instead of "[jmy]").
>
> This patch bumps the library version on gcc-7-branch to 6.0.24 and
> exports the "[jy]" versions of the symbols with version the new
> GLIBCXX_3.4.24 symbol version.
>
> This requires bumping the version on trunk to 6.0.25 and moving the
> new random_device::_M_get_entropy() symbol to GLIBCXX_3.4.25 (which
> will be done by the patch in the following mail).

Here's the patch for trunk.

Target maintainers will need to regenerate the baseline symbols on
gcc-7-branch and trunk.

I intend to commit this tomorrow to trunk.






commit 9354adee0dccc3ebc2aff07c3bdac3eb8ccdbda8
Author: Jonathan Wakely 
Date:   Wed Jun 14 14:06:40 2017 +0100

PR libstdc++/81092 add std::wstring symbols and bump library version

PR libstdc++/81092
* acinclude.m4: Bump libtool_VERSION.
* config/abi/post/i386-linux-gnu/baseline_symbols.txt: Update.
* config/abi/post/x86_64-linux-gnu/32/baseline_symbols.txt: Update.


Here is a corresponding patch for i486 and powerpc 32-bit, ok for trunk?


OK, thanks.



[C++ PATCH] symbol tables don't delete

2017-06-16 Thread Nathan Sidwell
We don't need to delete entries from a symbol table, so there's no point 
providing that functionality.


Applied to trunk.

nathan
--
Nathan Sidwell
2017-06-16  Nathan Sidwell  

	Symbol tables are insert only.
	* cp-tree.h (default_hash_traits ): Don't
	derive from pointer_hash.  Make undeletable.

Index: cp-tree.h
===
--- cp-tree.h	(revision 249264)
+++ cp-tree.h	(working copy)
@@ -550,7 +550,7 @@ identifier_p (tree t)
 
 template <>
 struct default_hash_traits 
-  : pointer_hash , ggc_remove 
+  : pointer_hash 
 {
   /* Use a regular tree as the type, to make using the hash table
  simpler.  We'll get dynamic type checking with the hash function
@@ -558,10 +558,14 @@ struct default_hash_traits 

Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Bin.Cheng
On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
 wrote:
> On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng  wrote:
>> Hi,
>> Loop split forces intermediate computation to gimple operands all the time 
>> when
>> computing bound information.  This is not good since folding opportunities 
>> are
>> missed.  This patch fixes the issue by feeding all computation to folder and 
>> only
>> forcing to gimple operand at last.
>>
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> Hm?  It uses gimple_build () which should do the same as fold_buildN in terms
> of simplification.
>
> So where does that not work?  It is supposed to be the prefered way and no
> new code should use force_gimple_operand (unless dealing with generic
> coming from other middle-end infrastructure like SCEV or niter analysis)
Hmm, current code calls force_gimpele operand several times which
causes the inefficiency.  The patch avoids that and does one call at
the end.

Thanks,
bin
>
> Richard.
>
>>
>> Thanks,
>> bin
>> 2017-06-12  Bin Cheng  
>>
>> * tree-ssa-loop-split.c (compute_new_first_bound): Feed bound
>> computation to folder, rather than force to gimple operands too
>> early.


Re: [PATCH, ARM] Implement __ARM_FEATURE_COPROC coprocessor intrinsic feature macro

2017-06-16 Thread Prakhar Bahuguna
Patch updated with code style fixes.

-- 

Prakhar Bahuguna
>From f1d76a6309a1fe16711b800507938eaa4f78852e Mon Sep 17 00:00:00 2001
From: Prakhar Bahuguna 
Date: Tue, 2 May 2017 13:43:40 +0100
Subject: [PATCH] Implement __ARM_FEATURE_COPROC coprocessor intrinsic feature
 macro

---
 gcc/config/arm/arm-c.c| 17 +
 gcc/testsuite/gcc.target/arm/acle/cdp.c   |  3 +++
 gcc/testsuite/gcc.target/arm/acle/cdp2.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/ldc.c   |  3 +++
 gcc/testsuite/gcc.target/arm/acle/ldc2.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/ldc2l.c |  3 +++
 gcc/testsuite/gcc.target/arm/acle/ldcl.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mcr.c   |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mcr2.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mcrr.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mcrr2.c |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mrc.c   |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mrc2.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mrrc.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/mrrc2.c |  3 +++
 gcc/testsuite/gcc.target/arm/acle/stc.c   |  3 +++
 gcc/testsuite/gcc.target/arm/acle/stc2.c  |  3 +++
 gcc/testsuite/gcc.target/arm/acle/stc2l.c |  3 +++
 gcc/testsuite/gcc.target/arm/acle/stcl.c  |  3 +++
 19 files changed, 71 insertions(+)

diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 3abe7d1f1f5..d8b17ffdccc 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -200,6 +200,23 @@ arm_cpu_builtins (struct cpp_reader* pfile)
   def_or_undef_macro (pfile, "__ARM_FEATURE_IDIV", TARGET_IDIV);
 
   def_or_undef_macro (pfile, "__ARM_ASM_SYNTAX_UNIFIED__", inline_asm_unified);
+
+  if ((!TARGET_THUMB || TARGET_THUMB2) && arm_arch4
+  && !(arm_arch8 && arm_arch_notm))
+{
+  int coproc_level = 0x1;
+
+  if (arm_arch5)
+   coproc_level |= 0x2;
+  if (arm_arch5e)
+   coproc_level |= 0x4;
+  if (arm_arch6)
+   coproc_level |= 0x8;
+
+  builtin_define_with_int_value ("__ARM_FEATURE_COPROC", coproc_level);
+}
+  else
+  cpp_undef (pfile, "__ARM_FEATURE_COPROC");
 }
 
 void
diff --git a/gcc/testsuite/gcc.target/arm/acle/cdp.c 
b/gcc/testsuite/gcc.target/arm/acle/cdp.c
index 28b218e7cfc..cebd8c4024e 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cdp.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cdp.c
@@ -5,6 +5,9 @@
 /* { dg-require-effective-target arm_coproc1_ok } */
 
 #include "arm_acle.h"
+#if (__ARM_FEATURE_COPROC & 0x1) == 0
+  #error "__ARM_FEATURE_COPROC does not have correct feature bits set"
+#endif
 
 void test_cdp (void)
 {
diff --git a/gcc/testsuite/gcc.target/arm/acle/cdp2.c 
b/gcc/testsuite/gcc.target/arm/acle/cdp2.c
index 00bcd502b56..945d435d2fb 100644
--- a/gcc/testsuite/gcc.target/arm/acle/cdp2.c
+++ b/gcc/testsuite/gcc.target/arm/acle/cdp2.c
@@ -5,6 +5,9 @@
 /* { dg-require-effective-target arm_coproc2_ok } */
 
 #include "arm_acle.h"
+#if (__ARM_FEATURE_COPROC & 0x2) == 0
+  #error "__ARM_FEATURE_COPROC does not have correct feature bits set"
+#endif
 
 void test_cdp2 (void)
 {
diff --git a/gcc/testsuite/gcc.target/arm/acle/ldc.c 
b/gcc/testsuite/gcc.target/arm/acle/ldc.c
index f45f25d8c97..cd57343208f 100644
--- a/gcc/testsuite/gcc.target/arm/acle/ldc.c
+++ b/gcc/testsuite/gcc.target/arm/acle/ldc.c
@@ -5,6 +5,9 @@
 /* { dg-require-effective-target arm_coproc1_ok } */
 
 #include "arm_acle.h"
+#if (__ARM_FEATURE_COPROC & 0x1) == 0
+  #error "__ARM_FEATURE_COPROC does not have correct feature bits set"
+#endif
 
 extern void * p;
 
diff --git a/gcc/testsuite/gcc.target/arm/acle/ldc2.c 
b/gcc/testsuite/gcc.target/arm/acle/ldc2.c
index 433bf8a1204..d7691e30d76 100644
--- a/gcc/testsuite/gcc.target/arm/acle/ldc2.c
+++ b/gcc/testsuite/gcc.target/arm/acle/ldc2.c
@@ -5,6 +5,9 @@
 /* { dg-require-effective-target arm_coproc2_ok } */
 
 #include "arm_acle.h"
+#if (__ARM_FEATURE_COPROC & 0x2) == 0
+  #error "__ARM_FEATURE_COPROC does not have correct feature bits set"
+#endif
 
 extern void * p;
 
diff --git a/gcc/testsuite/gcc.target/arm/acle/ldc2l.c 
b/gcc/testsuite/gcc.target/arm/acle/ldc2l.c
index 88c8aa44765..9ee63afa055 100644
--- a/gcc/testsuite/gcc.target/arm/acle/ldc2l.c
+++ b/gcc/testsuite/gcc.target/arm/acle/ldc2l.c
@@ -5,6 +5,9 @@
 /* { dg-require-effective-target arm_coproc2_ok } */
 
 #include "arm_acle.h"
+#if (__ARM_FEATURE_COPROC & 0x2) == 0
+  #error "__ARM_FEATURE_COPROC does not have correct feature bits set"
+#endif
 
 extern void * p;
 
diff --git a/gcc/testsuite/gcc.target/arm/acle/ldcl.c 
b/gcc/testsuite/gcc.target/arm/acle/ldcl.c
index 72a97f1d7b7..a6bfd9011dc 100644
--- a/gcc/testsuite/gcc.target/arm/acle/ldcl.c
+++ b/gcc/testsuite/gcc.target/arm/acle/ldcl.c
@@ -5,6 +5,9 @@
 /* { dg-require-effective-target arm_coproc1_ok } */
 
 #include "arm_acle.h"
+#if (__ARM_FEATURE_COPROC & 0x1) == 0
+  #error "__ARM_FEATURE_COPROC does not have correct feature bits set"
+#endif
 
 extern void * p;
 
diff --git 

[PATCH] Fix std::wbuffer_convert::sync()

2017-06-16 Thread Jonathan Wakely

My recent fix to wbuffer_convert::_M_put isn't the only problem in
that class, I also messed up the condition in wbuffer_convert::sync.

* include/bits/locale_conv.h (wbuffer_convert::sync): Fix condition.
* testsuite/22_locale/conversions/buffer/2.cc: New.

Tested powerpc64le-linux, committed to trunk.

I'm also testing this for the branches and will fix it there too.

commit 02d02f35ce5c96136edf8bb8ec8035b236303079
Author: Jonathan Wakely 
Date:   Fri Jun 16 13:59:43 2017 +0100

Fix std::wbuffer_convert::sync()

* include/bits/locale_conv.h (wbuffer_convert::sync): Fix condition.
* testsuite/22_locale/conversions/buffer/2.cc: New.

diff --git a/libstdc++-v3/include/bits/locale_conv.h 
b/libstdc++-v3/include/bits/locale_conv.h
index 74beaec..47c8dee 100644
--- a/libstdc++-v3/include/bits/locale_conv.h
+++ b/libstdc++-v3/include/bits/locale_conv.h
@@ -375,7 +375,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
 protected:
   int
   sync()
-  { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; }
+  { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
 
   typename _Wide_streambuf::int_type
   overflow(typename _Wide_streambuf::int_type __out)
diff --git a/libstdc++-v3/testsuite/22_locale/conversions/buffer/2.cc 
b/libstdc++-v3/testsuite/22_locale/conversions/buffer/2.cc
new file mode 100644
index 000..8eda714
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/conversions/buffer/2.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2017 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do run { target c++11 } }
+
+#include 
+#include 
+#include 
+
+void
+test01()
+{
+  struct Cvt : std::codecvt { };
+  std::stringstream ss;
+  std::wbuffer_convert cvt(ss.rdbuf());
+  auto p = ss.std::ios::rdbuf();
+  ss << "hello";
+  VERIFY( ss.flush().good() );
+  ss.std::ios::rdbuf(p);
+}
+
+int main()
+{
+  test01();
+}


[C++ Patch] PR 64644 (""warning: anonymous union with no members" should be an error with -pedantic-errors")

2017-06-16 Thread Paolo Carlini

Hi,

submitter and Manuel analyzed this a while ago and came to the 
conclusion - which I think is still valid vs the current working draft - 
that strictly speaking this kind of code violates [dcl.dcl], thus a 
pedwarn seems more suited than a plain warning. The below one-liner, 
suggested by Manuel at the time, passes testing on x86_64-linux together 
with my testsuite changes.


Thanks,

Paolo.

//

/cp
2017-06-16  Manuel Lopez-Ibanez  
Paolo Carlini  

PR c++/64644
* decl2.c (finish_anon_union): Complain about "anonymous union with
no members" with a pedwarn.

/testsuite
2017-06-16  Manuel Lopez-Ibanez  
Paolo Carlini  

PR c++/64644
* g++.dg/other/anon-union4.C: New.
* g++.old-deja/g++.law/union4.C: Adjust.
* g++.old-deja/g++.other/anon1.C: Likewise.
Index: cp/decl2.c
===
--- cp/decl2.c  (revision 249233)
+++ cp/decl2.c  (working copy)
@@ -1559,7 +1559,7 @@ finish_anon_union (tree anon_union_decl)
 return;
   if (main_decl == NULL_TREE)
 {
-  warning (0, "anonymous union with no members");
+  pedwarn (input_location, 0, "anonymous union with no members");
   return;
 }
 
Index: testsuite/g++.dg/other/anon-union4.C
===
--- testsuite/g++.dg/other/anon-union4.C(revision 0)
+++ testsuite/g++.dg/other/anon-union4.C(working copy)
@@ -0,0 +1,3 @@
+// PR c++/64644
+
+static union { };  // { dg-error "anonymous union with no members" }
Index: testsuite/g++.old-deja/g++.law/union4.C
===
--- testsuite/g++.old-deja/g++.law/union4.C (revision 249233)
+++ testsuite/g++.old-deja/g++.law/union4.C (working copy)
@@ -10,4 +10,4 @@ static union {
 struct SS {
 int ss;
 };
-};// { dg-warning "no members" }
+};// { dg-error "no members" }
Index: testsuite/g++.old-deja/g++.other/anon1.C
===
--- testsuite/g++.old-deja/g++.other/anon1.C(revision 249233)
+++ testsuite/g++.old-deja/g++.other/anon1.C(working copy)
@@ -3,4 +3,4 @@
 static union {
   union {
   };
-}; // { dg-warning "" } anonymous union with no members
+}; // { dg-error "" } anonymous union with no members


Re: [PATCH] PR libstdc++/81092 add std::wstring symbols and bump library version

2017-06-16 Thread Jakub Jelinek
On Wed, Jun 14, 2017 at 07:13:05PM +0100, Jonathan Wakely wrote:
> Target maintainers will need to regenerate the baseline symbols on
> gcc-7-branch and trunk.
> 
> I intend to commit this tomorrow to gcc-7-branch.
> 
> 

> commit 425e1d784d9085f0f8d58ef6a381cc73d564f2f5
> Author: Jonathan Wakely 
> Date:   Wed Jun 14 14:06:40 2017 +0100
> 
> PR libstdc++/81092 add std::wstring symbols and bump library version
> 
>   PR libstdc++/81092
>   * acinclude.m4: Bump libtool_VERSION.
>   * config/abi/post/i386-linux-gnu/baseline_symbols.txt: Update.
>   * config/abi/post/x86_64-linux-gnu/32/baseline_symbols.txt: Update.

Here is corresponding patch for i486 (which is just a copy of i386), ok
for gcc-7-branch?

2017-06-16  Jakub Jelinek  

PR libstdc++/81092
* config/abi/post/i486-linux-gnu/baseline_symbols.txt: Update.

--- libstdc++-v3/config/abi/post/i486-linux-gnu/baseline_symbols.txt.jj 
2017-04-20 12:18:56.748224426 +0200
+++ libstdc++-v3/config/abi/post/i486-linux-gnu/baseline_symbols.txt
2017-06-16 16:08:30.187489797 +0200
@@ -1329,6 +1329,7 @@ FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1EP
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1EPKwjRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jRKS1_@@GLIBCXX_3.4.24
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jj@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ERKS2_jjRKS1_@@GLIBCXX_3.4
 
FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC1ESt16initializer_listIwERKS1_@@GLIBCXX_3.4.11
@@ -1342,6 +1343,7 @@ FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2EP
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2EPKwjRKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS1_@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_@@GLIBCXX_3.4
+FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_jRKS1_@@GLIBCXX_3.4.24
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_jj@@GLIBCXX_3.4
 FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ERKS2_jjRKS1_@@GLIBCXX_3.4
 
FUNC:_ZNSbIwSt11char_traitsIwESaIwEEC2ESt16initializer_listIwERKS1_@@GLIBCXX_3.4.11
@@ -4002,6 +4004,7 @@ OBJECT:0:GLIBCXX_3.4.20
 OBJECT:0:GLIBCXX_3.4.21
 OBJECT:0:GLIBCXX_3.4.22
 OBJECT:0:GLIBCXX_3.4.23
+OBJECT:0:GLIBCXX_3.4.24
 OBJECT:0:GLIBCXX_3.4.3
 OBJECT:0:GLIBCXX_3.4.4
 OBJECT:0:GLIBCXX_3.4.5


Jakub


[C++ PATCH] Lazier rtti

2017-06-16 Thread Nathan Sidwell
The rtti descriptor classes and associated variables are constructed as 
needed during compilation.  On the modules branch I found it best to 
write them by special reference and reconstruct them on read back.  That 
meant some changes to how rtti currently handles them.  This patch 
implements most of those changes, which are useful in their own right.


Internally we use an enumeration to identify the different 
__FOO_type_info classes, and create pseudo versions that don't escape 
the compilation.  These match definitions in the cxxabi.h header file. 
However, we were tangling up 'tell me the index for the typeinfo 
container for type X', with 'construct the typeinfo container for type 
X'.  This patch cleanly separates those questions (between which I can 
insert module streaming).


The typeinfo construction now lazily constructs all these descriptor 
types.  It removes a hole in the enumeration (there are no VMI 
descriptors with zero bases).  Also, when looking to see if we're 
building the runtime (needed to enable a piece of internal magic), we no 
longer use xref_tag, which will construct an incomplete type.  Instead 
use regular qualified lookup in the ABI namespace.


Applied to trunk.

nathan
--
Nathan Sidwell
2017-06-16  Nathan Sidwell  

	Make rtti lazier
	* rtti.c (enum tinfo_kind): Add TK_DERIVED_TYPES,
	TK_VMI_CLASS_TYPES, TK_MAX.  Delete TK_FIXED.
	(tinfo_names): New.
	(typeid_ok_p): Add quotes to error messages.  Use get_tinfo_desc.
	(get_tinfo_decl): Use get_tinfo_desc.
	(get_pseudo_ti_init): Likewise. Adjust VMI construction.
	(create_pseudo_type_info): Delete.
	(get_pseudo_ti_index): Just determine the index.
	(get_tinfo_desc): New.  Create all types lazily.
	(create_tinfo_types): Just allocate the descriptor array.
	(emit_support_tinfos): Use non-inserting type lookup.  Set builtin
	location.

Index: rtti.c
===
--- rtti.c	(revision 249255)
+++ rtti.c	(working copy)
@@ -75,7 +75,8 @@ enum tinfo_kind
 {
   TK_TYPE_INFO_TYPE,/* abi::__type_info_pseudo */
   TK_BASE_TYPE,		/* abi::__base_class_type_info */
-  TK_BUILTIN_TYPE,	/* abi::__fundamental_type_info */
+  TK_DERIVED_TYPES,	/* Start of types derived from abi::__type_info  */
+  TK_BUILTIN_TYPE = TK_DERIVED_TYPES,	/* abi::__fundamental_type_info */
   TK_ARRAY_TYPE,	/* abi::__array_type_info */
   TK_FUNCTION_TYPE,	/* abi::__function_type_info */
   TK_ENUMERAL_TYPE,	/* abi::__enum_type_info */
@@ -83,8 +84,26 @@ enum tinfo_kind
   TK_POINTER_MEMBER_TYPE, /* abi::__pointer_to_member_type_info */
   TK_CLASS_TYPE,	/* abi::__class_type_info */
   TK_SI_CLASS_TYPE,	/* abi::__si_class_type_info */
-  TK_FIXED		/* end of fixed descriptors. */
-  /* ...		   abi::__vmi_type_info */
+  TK_VMI_CLASS_TYPES,	/* abi::__vmi_class_type_info */
+  TK_MAX
+};
+
+/* Names of the tinfo types.  Must be same order as TK enumeration
+   above.  */
+
+static const char *const tinfo_names[TK_MAX] =
+{
+  "__type_info",
+  "__base_class_type_info",
+  "__fundamental_type_info",
+  "__array_type_info",
+  "__function_type_info",
+  "__enum_type_info",
+  "__pointer_type_info",
+  "__pointer_to_member_type_info",
+  "__class_type_info",
+  "__si_class_type_info",
+  "__vmi_class_type_info"
 };
 
 /* Helper macro to get maximum scalar-width of pointer or of the 'long'-type.
@@ -115,9 +134,9 @@ static tree generic_initializer (tinfo_s
 static tree ptr_initializer (tinfo_s *, tree);
 static tree ptm_initializer (tinfo_s *, tree);
 static tree class_initializer (tinfo_s *, tree, unsigned, ...);
-static void create_pseudo_type_info (int, const char *, ...);
 static tree get_pseudo_ti_init (tree, unsigned);
 static unsigned get_pseudo_ti_index (tree);
+static tinfo_s *get_tinfo_desc (unsigned);
 static void create_tinfo_types (void);
 static bool typeinfo_in_lib_p (tree);
 
@@ -289,30 +308,27 @@ get_tinfo_decl_dynamic (tree exp, tsubst
 static bool
 typeid_ok_p (void)
 {
-  tree pseudo_type_info, type_info_type;
-
   if (! flag_rtti)
 {
-  error ("cannot use typeid with -fno-rtti");
+  error ("cannot use % with -fno-rtti");
   return false;
 }
 
   if (!COMPLETE_TYPE_P (const_type_info_type_node))
 {
-  error ("must #include  before using typeid");
+  error ("must %<#include %> before using %");
   return false;
 }
 
-  pseudo_type_info = (*tinfo_descs)[TK_TYPE_INFO_TYPE].type;
-  type_info_type = TYPE_MAIN_VARIANT (const_type_info_type_node);
+  tree pseudo = TYPE_MAIN_VARIANT (get_tinfo_desc (TK_TYPE_INFO_TYPE)->type);
+  tree real = TYPE_MAIN_VARIANT (const_type_info_type_node);
 
   /* Make sure abi::__type_info_pseudo has the same alias set
  as std::type_info.  */
-  if (! TYPE_ALIAS_SET_KNOWN_P (pseudo_type_info))
-TYPE_ALIAS_SET (pseudo_type_info) = get_alias_set (type_info_type);
+  if (! TYPE_ALIAS_SET_KNOWN_P (pseudo))
+TYPE_ALIAS_SET (pseudo) = get_alias_set (real);
   else
-gcc_assert 

[C++ PATCH] Keyed classes list

2017-06-16 Thread Nathan Sidwell
We need to keep a record of classes whose key-method has been emitted 
(or that lack one).  This patch changes that from a TREE_LIST to a vector.


The final processing has an elaborate structure, trying to optimize the 
point at which we have to start stitching out classes from the list. 
This patch simply reverse iterates over the vector and uses 
unordered_remove.  Exactly like the next loop that does a similar 
processing for type info objects.


Applied to trunk (also separately committed the typo that crept into the 
previous rtti commit)


nathan
--
Nathan Sidwell
Index: class.c
===
--- class.c	(revision 249255)
+++ class.c	(working copy)
@@ -7189,8 +7189,8 @@ finish_struct_1 (tree t)
 	 in every translation unit where the class definition appears.  If
 	 we're devirtualizing, we can look into the vtable even if we
 	 aren't emitting it.  */
-  if (CLASSTYPE_KEY_METHOD (t) == NULL_TREE)
-	keyed_classes = tree_cons (NULL_TREE, t, keyed_classes);
+  if (!CLASSTYPE_KEY_METHOD (t))
+	vec_safe_push (keyed_classes, t);
 }
 
   /* Layout the class itself.  */
Index: cp-tree.h
===
--- cp-tree.h	(revision 249255)
+++ cp-tree.h	(working copy)
@@ -169,8 +169,6 @@ enum cp_tree_index
 CPTI_DSO_HANDLE,
 CPTI_DCAST,
 
-CPTI_KEYED_CLASSES,
-
 CPTI_NULLPTR,
 CPTI_NULLPTR_TYPE,
 
@@ -290,11 +288,6 @@ extern GTY(()) tree cp_global_trees[CPTI
destructors.  */
 #define vtt_parm_type			cp_global_trees[CPTI_VTT_PARM_TYPE]
 
-/* A TREE_LIST of the dynamic classes whose vtables may have to be
-   emitted in this translation unit.  */
-
-#define keyed_classes			cp_global_trees[CPTI_KEYED_CLASSES]
-
 /* A node which matches any template argument.  */
 #define any_targ_node			cp_global_trees[CPTI_ANY_TARG]
 
@@ -5107,6 +5100,10 @@ extern GTY(()) vec *local_c
 /* An array of static vars & fns.  */
 extern GTY(()) vec *static_decls;
 
+/* An array of vtable-needing types that have no key function, or have
+   an emitted key function.  */
+extern GTY(()) vec *keyed_classes;
+
 
 /* Here's where we control how name mangling takes place.  */
 
Index: decl.c
===
--- decl.c	(revision 249255)
+++ decl.c	(working copy)
@@ -160,6 +160,9 @@ tree integer_two_node;
 /* vector of static decls.  */
 vec *static_decls;
 
+/* vector of keyed classes.  */
+vec *keyed_classes;
+
 /* Used only for jumps to as-yet undefined labels, since jumps to
defined labels can have their validity checked immediately.  */
 
@@ -4064,6 +4067,9 @@ cxx_init_decl_processing (void)
   /* Guess at the initial static decls size.  */
   vec_alloc (static_decls, 500);
 
+  /* ... and keyed classes.  */
+  vec_alloc (keyed_classes, 100);
+
   record_builtin_type (RID_BOOL, "bool", boolean_type_node);
   truthvalue_type_node = boolean_type_node;
   truthvalue_false_node = boolean_false_node;
@@ -15438,7 +15444,7 @@ record_key_method_defined (tree fndecl)
 {
   tree fnclass = DECL_CONTEXT (fndecl);
   if (fndecl == CLASSTYPE_KEY_METHOD (fnclass))
-	keyed_classes = tree_cons (NULL_TREE, fnclass, keyed_classes);
+	vec_safe_push (keyed_classes, fnclass);
 }
 }
 
Index: decl2.c
===
--- decl2.c	(revision 249255)
+++ decl2.c	(working copy)
@@ -4511,37 +4511,18 @@ c_parse_final_cleanups (void)
   instantiate_pending_templates (retries);
   ggc_collect ();
 
-  /* Write out virtual tables as required.  Note that writing out
-	 the virtual table for a template class may cause the
+  /* Write out virtual tables as required.  Writing out the
+	 virtual table for a template class may cause the
 	 instantiation of members of that class.  If we write out
 	 vtables then we remove the class from our list so we don't
 	 have to look at it again.  */
-
-  while (keyed_classes != NULL_TREE
-	 && maybe_emit_vtables (TREE_VALUE (keyed_classes)))
-	{
-	  reconsider = true;
-	  keyed_classes = TREE_CHAIN (keyed_classes);
-	}
-
-  t = keyed_classes;
-  if (t != NULL_TREE)
-	{
-	  tree next = TREE_CHAIN (t);
-
-	  while (next)
-	{
-	  if (maybe_emit_vtables (TREE_VALUE (next)))
-		{
-		  reconsider = true;
-		  TREE_CHAIN (t) = TREE_CHAIN (next);
-		}
-	  else
-		t = next;
-
-	  next = TREE_CHAIN (t);
-	}
-	}
+  for (i = keyed_classes->length ();
+	   keyed_classes->iterate (--i, );)
+	if (maybe_emit_vtables (t))
+	  {
+	reconsider = true;
+	keyed_classes->unordered_remove (i);
+	  }
 
   /* Write out needed type info variables.  We have to be careful
 	 looping through unemitted decls, because emit_tinfo_decl may
Index: pt.c
===
--- pt.c	(revision 249255)
+++ pt.c	(working copy)
@@ -10864,9 

Re: [PATCH, ARM] Implement __ARM_FEATURE_COPROC coprocessor intrinsic feature macro

2017-06-16 Thread Richard Earnshaw (lists)
On 16/06/17 08:48, Prakhar Bahuguna wrote:
> On 15/06/2017 17:23:43, Richard Earnshaw (lists) wrote:
>> On 14/06/17 10:35, Prakhar Bahuguna wrote:
>>> The ARM ACLE defines the __ARM_FEATURE_COPROC macro which indicates which
>>> coprocessor intrinsics are available for the target. If 
>>> __ARM_FEATURE_COPROC is
>>> undefined, the target does not support coprocessor intrinsics. The feature
>>> levels are defined as follows:
>>>
>>> +-+---+--+
>>> | **Bit** | **Value** | **Intrinsics Available** |
>>> +-+---+--+
>>> | 0   | 0x1   | __arm_cdp __arm_ldc, __arm_ldcl, __arm_stc,  |
>>> | |   | __arm_stcl, __arm_mcr and __arm_mrc  |
>>> +-+---+--+
>>> | 1   | 0x2   | __arm_cdp2, __arm_ldc2, __arm_stc2, __arm_ldc2l, |
>>> | |   | __arm_stc2l, __arm_mcr2 and __arm_mrc2   |
>>> +-+---+--+
>>> | 2   | 0x4   | __arm_mcrr and __arm_mrrc|
>>> +-+---+--+
>>> | 3   | 0x8   | __arm_mcrr2 and __arm_mrrc2  |
>>> +-+---+--+
>>>
>>> This patch implements full support for this feature macro as defined in 
>>> section
>>> 5.9 of the ACLE
>>> (https://developer.arm.com/products/software-development-tools/compilers/arm-compiler-5/docs/101028/latest/5-feature-test-macros).
>>>
>>> gcc/ChangeLog:
>>>
>>> 2017-06-14  Prakhar Bahuguna  
>>>
>>> * config/arm/arm-c.c (arm_cpu_builtins): New block to define
>>>  __ARM_FEATURE_COPROC according to support.
>>>
>>> 2017-06-14  Prakhar Bahuguna  
>>> * gcc/testsuite/gcc.target/arm/acle/cdp.c: Add feature macro bitmap
>>> test.
>>> * gcc/testsuite/gcc.target/arm/acle/cdp2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/ldc.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/ldc2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/ldc2l.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/ldcl.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mcr.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mcr2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mcrr.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mcrr2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mrc.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mrc2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mrrc.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/mrrc2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/stc.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/stc2.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/stc2l.c: Likewise.
>>> * gcc/testsuite/gcc.target/arm/acle/stcl.c: Likewise.
>>>
>>> Testing done: ACLE regression tests updated with tests for feature macro 
>>> bits.
>>> All regression tests pass.
>>>
>>> Okay for trunk?
>>>
>>>
>>> 0001-Implement-__ARM_FEATURE_COPROC-coprocessor-intrinsic.patch
>>>
>>>
>>> From 79d71aec9d2bdee936b240ae49368ff5f8d8fc48 Mon Sep 17 00:00:00 2001
>>> From: Prakhar Bahuguna 
>>> Date: Tue, 2 May 2017 13:43:40 +0100
>>> Subject: [PATCH] Implement __ARM_FEATURE_COPROC coprocessor intrinsic 
>>> feature
>>>  macro
>>>
>>> ---
>>>  gcc/config/arm/arm-c.c| 19 +++
>>>  gcc/testsuite/gcc.target/arm/acle/cdp.c   |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/cdp2.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/ldc.c   |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/ldc2.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/ldc2l.c |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/ldcl.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mcr.c   |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mcr2.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mcrr.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mcrr2.c |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mrc.c   |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mrc2.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mrrc.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/mrrc2.c |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/stc.c   |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/stc2.c  |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/stc2l.c |  3 +++
>>>  gcc/testsuite/gcc.target/arm/acle/stcl.c  |  3 +++
>>>  19 files changed, 73 insertions(+)
>>>
>>> diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
>>> index 3abe7d1f1f5..3daf4e5e1f3 100644
>>> --- a/gcc/config/arm/arm-c.c
>>> +++ b/gcc/config/arm/arm-c.c
>>> @@ -200,6 +200,25 @@ arm_cpu_builtins (struct cpp_reader* 

[C++ PATCH] class method vector

2017-06-16 Thread Nathan Sidwell
The class method vector needs resorting on PCH readin, and module 
readback.  It suffered an unfortunate signed/unsigned comparison 
collision, that I hit.  Reworked to be made safer.


Of course this thing should really be a hash table + vector of type 
conversions, but that's another story.


applied to trunk.

nathan
--
Nathan Sidwell
2017-06-16  Nathan Sidwell  

	* class.c (resort_type_method_vec): Avoid potential unsigned
	overflow.

Index: class.c
===
--- class.c	(revision 249264)
+++ class.c	(working copy)
@@ -2328,25 +2328,25 @@ resort_type_method_vec (void* obj,
 			gt_pointer_operator new_value,
 			void* cookie)
 {
-  vec *method_vec = (vec *) obj;
-  int len = vec_safe_length (method_vec);
-  size_t slot;
-  tree fn;
+  if (vec *method_vec = (vec *) obj)
+{
+  int len = method_vec->length ();
+  int slot;
 
-  /* The type conversion ops have to live at the front of the vec, so we
- can't sort them.  */
-  for (slot = CLASSTYPE_FIRST_CONVERSION_SLOT;
-   vec_safe_iterate (method_vec, slot, );
-   ++slot)
-if (!DECL_CONV_FN_P (OVL_FIRST (fn)))
-  break;
+  /* The type conversion ops have to live at the front of the vec, so we
+	 can't sort them.  */
+  for (slot = CLASSTYPE_FIRST_CONVERSION_SLOT;
+	   slot < len; slot++)
+	if (!DECL_CONV_FN_P (OVL_FIRST ((*method_vec)[slot])))
+	  break;
 
-  if (len - slot > 1)
-{
-  resort_data.new_value = new_value;
-  resort_data.cookie = cookie;
-  qsort (method_vec->address () + slot, len - slot, sizeof (tree),
-	 resort_method_name_cmp);
+  if (len > slot + 1)
+	{
+	  resort_data.new_value = new_value;
+	  resort_data.cookie = cookie;
+	  qsort (method_vec->address () + slot, len - slot, sizeof (tree),
+		 resort_method_name_cmp);
+	}
 }
 }
 


Re: [PATCH, ARM] Implement __ARM_FEATURE_COPROC coprocessor intrinsic feature macro

2017-06-16 Thread Prakhar Bahuguna
On 16/06/2017 15:37:18, Richard Earnshaw (lists) wrote:
> On 16/06/17 08:48, Prakhar Bahuguna wrote:
> > On 15/06/2017 17:23:43, Richard Earnshaw (lists) wrote:
> >> On 14/06/17 10:35, Prakhar Bahuguna wrote:
> >>> The ARM ACLE defines the __ARM_FEATURE_COPROC macro which indicates which
> >>> coprocessor intrinsics are available for the target. If 
> >>> __ARM_FEATURE_COPROC is
> >>> undefined, the target does not support coprocessor intrinsics. The feature
> >>> levels are defined as follows:
> >>>
> >>> +-+---+--+
> >>> | **Bit** | **Value** | **Intrinsics Available** |
> >>> +-+---+--+
> >>> | 0   | 0x1   | __arm_cdp __arm_ldc, __arm_ldcl, __arm_stc,  |
> >>> | |   | __arm_stcl, __arm_mcr and __arm_mrc  |
> >>> +-+---+--+
> >>> | 1   | 0x2   | __arm_cdp2, __arm_ldc2, __arm_stc2, __arm_ldc2l, |
> >>> | |   | __arm_stc2l, __arm_mcr2 and __arm_mrc2   |
> >>> +-+---+--+
> >>> | 2   | 0x4   | __arm_mcrr and __arm_mrrc|
> >>> +-+---+--+
> >>> | 3   | 0x8   | __arm_mcrr2 and __arm_mrrc2  |
> >>> +-+---+--+
> >>>
> >>> This patch implements full support for this feature macro as defined in 
> >>> section
> >>> 5.9 of the ACLE
> >>> (https://developer.arm.com/products/software-development-tools/compilers/arm-compiler-5/docs/101028/latest/5-feature-test-macros).
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>> 2017-06-14  Prakhar Bahuguna  
> >>>
> >>>   * config/arm/arm-c.c (arm_cpu_builtins): New block to define
> >>>__ARM_FEATURE_COPROC according to support.
> >>>
> >>> 2017-06-14  Prakhar Bahuguna  
> >>>   * gcc/testsuite/gcc.target/arm/acle/cdp.c: Add feature macro bitmap
> >>>   test.
> >>>   * gcc/testsuite/gcc.target/arm/acle/cdp2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/ldc.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/ldc2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/ldc2l.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/ldcl.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mcr.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mcr2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mcrr.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mcrr2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mrc.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mrc2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mrrc.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/mrrc2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/stc.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/stc2.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/stc2l.c: Likewise.
> >>>   * gcc/testsuite/gcc.target/arm/acle/stcl.c: Likewise.
> >>>
> >>> Testing done: ACLE regression tests updated with tests for feature macro 
> >>> bits.
> >>> All regression tests pass.
> >>>
> >>> Okay for trunk?
> >>>
> >>>
> >>> 0001-Implement-__ARM_FEATURE_COPROC-coprocessor-intrinsic.patch
> >>>
> >>>
> >>> From 79d71aec9d2bdee936b240ae49368ff5f8d8fc48 Mon Sep 17 00:00:00 2001
> >>> From: Prakhar Bahuguna 
> >>> Date: Tue, 2 May 2017 13:43:40 +0100
> >>> Subject: [PATCH] Implement __ARM_FEATURE_COPROC coprocessor intrinsic 
> >>> feature
> >>>  macro
> >>>
> >>> ---
> >>>  gcc/config/arm/arm-c.c| 19 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/cdp.c   |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/cdp2.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/ldc.c   |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/ldc2.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/ldc2l.c |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/ldcl.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mcr.c   |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mcr2.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mcrr.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mcrr2.c |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mrc.c   |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mrc2.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mrrc.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/mrrc2.c |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/stc.c   |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/stc2.c  |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/stc2l.c |  3 +++
> >>>  gcc/testsuite/gcc.target/arm/acle/stcl.c  |  3 +++
> >>>  19 files changed, 73 insertions(+)
> >>>
> >>> diff --git 

[C++ PATCH] PARM context

2017-06-16 Thread Nathan Sidwell
We weren't always setting DECL_CONTEXT for parm decls.  Which caused me 
some issues in the modules branch.  Fixed by making the callers of the 
parm-decl builders pass in the function decl.  I suspect the places in 
grokdecl dealing with declaring via typedef (where I pass NULL) will 
need addressing later on, but this seems a useful cleanup.


Applied to trunk.

nathan
--
Nathan Sidwell
2017-06-16  Nathan Sidwell  

	gcc/cp/
	* cp-tree.h (build_this_parm, cp_build_parm_decl,
	build_artificial_parm): Add FN parm.
	* decl.c (start_cleanup_fn): Adjust.
	(build_this_parm): Add FN parm, pass it through.
	(grokfndecl): Adjust parm building.
	* decl2.c (cp_build_parm_decl): Add FN parm, set context.
	(build_artificial_parm): Add FN parm, pass through.
	(maybe_retrofit_in_chrg): Adjust parm building.
	(start_static_storage_duration_function): Likwise.
	* lambda.c (maybe_aadd_lambda_conv_op): Likewise.
	* method.c (implicitly_declare_fn): Likewise.
	* parser.c (inject_this_parameter): Likewise.

	libcc1/
	* libcp1plugin.cc (plugin_build_decl): Adjust parm building.

Index: gcc/cp/cp-tree.h
===
--- gcc/cp/cp-tree.h	(revision 249266)
+++ gcc/cp/cp-tree.h	(working copy)
@@ -6078,7 +6078,7 @@ extern int cp_complete_array_type_or_err
 extern tree build_ptrmemfunc_type		(tree);
 extern tree build_ptrmem_type			(tree, tree);
 /* the grokdeclarator prototype is in decl.h */
-extern tree build_this_parm			(tree, cp_cv_quals);
+extern tree build_this_parm			(tree, tree, cp_cv_quals);
 extern tree grokparms(tree, tree *);
 extern int copy_fn_p(const_tree);
 extern bool move_fn_p   (const_tree);
@@ -6179,7 +6179,7 @@ extern void check_default_args			(tree);
 extern bool mark_used(tree);
 extern bool mark_used			(tree, tsubst_flags_t);
 extern void finish_static_data_member_decl	(tree, tree, bool, tree, int);
-extern tree cp_build_parm_decl			(tree, tree);
+extern tree cp_build_parm_decl			(tree, tree, tree);
 extern tree get_guard(tree);
 extern tree get_guard_cond			(tree, bool);
 extern tree set_guard(tree);
@@ -6188,7 +6188,7 @@ extern void mark_needed(tree);
 extern bool decl_needed_p			(tree);
 extern void note_vague_linkage_fn		(tree);
 extern void note_variable_template_instantiation (tree);
-extern tree build_artificial_parm		(tree, tree);
+extern tree build_artificial_parm		(tree, tree, tree);
 extern bool possibly_inlined_p			(tree);
 extern int parm_index   (tree);
 extern tree vtv_start_verification_constructor_init_function (void);
Index: gcc/cp/decl.c
===
--- gcc/cp/decl.c	(revision 249264)
+++ gcc/cp/decl.c	(working copy)
@@ -7823,10 +7823,7 @@ start_cleanup_fn (void)
   /* Build the parameter.  */
   if (use_cxa_atexit)
 {
-  tree parmdecl;
-
-  parmdecl = cp_build_parm_decl (NULL_TREE, ptr_type_node);
-  DECL_CONTEXT (parmdecl) = fndecl;
+  tree parmdecl = cp_build_parm_decl (fndecl, NULL_TREE, ptr_type_node);
   TREE_USED (parmdecl) = 1;
   DECL_READ_P (parmdecl) = 1;
   DECL_ARGUMENTS (fndecl) = parmdecl;
@@ -8367,12 +8364,12 @@ check_class_member_definition_namespace
 	   decl, DECL_CONTEXT (decl));
 }
 
-/* Build a PARM_DECL for the "this" parameter.  TYPE is the
+/* Build a PARM_DECL for the "this" parameter of FN.  TYPE is the
METHOD_TYPE for a non-static member function; QUALS are the
cv-qualifiers that apply to the function.  */
 
 tree
-build_this_parm (tree type, cp_cv_quals quals)
+build_this_parm (tree fn, tree type, cp_cv_quals quals)
 {
   tree this_type;
   tree qual_type;
@@ -8391,7 +8388,7 @@ build_this_parm (tree type, cp_cv_quals
  assigned to.  */
   this_quals = (quals & TYPE_QUAL_RESTRICT) | TYPE_QUAL_CONST;
   qual_type = cp_build_qualified_type (this_type, this_quals);
-  parm = build_artificial_parm (this_identifier, qual_type);
+  parm = build_artificial_parm (fn, this_identifier, qual_type);
   cp_apply_type_quals_to_decl (this_quals, parm);
   return parm;
 }
@@ -8525,8 +8522,7 @@ grokfndecl (tree ctype,
 
   if (TREE_CODE (type) == METHOD_TYPE)
 {
-  tree parm;
-  parm = build_this_parm (type, quals);
+  tree parm = build_this_parm (decl, type, quals);
   DECL_CHAIN (parm) = parms;
   parms = parm;
 
@@ -11625,7 +11621,8 @@ grokdeclarator (const cp_declarator *dec
 	   args && args != void_list_node;
 	   args = TREE_CHAIN (args))
 	{
-	  tree decl = cp_build_parm_decl (NULL_TREE, TREE_VALUE (args));
+	  tree decl = cp_build_parm_decl (NULL_TREE, NULL_TREE,
+	  TREE_VALUE (args));
 
 	  DECL_CHAIN (decl) = decls;
 	  decls = decl;
@@ -11794,7 +11791,7 @@ grokdeclarator (const cp_declarator *dec
 
 if (decl_context == PARM)
   {
-	decl = cp_build_parm_decl (unqualified_id, type);
+	decl = cp_build_parm_decl (NULL_TREE, unqualified_id, type);
 	DECL_ARRAY_PARAMETER_P (decl) = 

[PATCH] Fix PR71815 (SLSR misses PHI opportunities)

2017-06-16 Thread Bill Schmidt
Hi,

PR71815 identifies a situation where SLSR misses opportunities for 
PHI candidates when code hoisting is enabled (which is now on by
default).  The basic problem is that SLSR currently uses an overly
simple test for profitability of the transformation.  The algorithm
currently requires that the PHI basis (through which the non-local
SLSR candidate is propagated) has only one use, which is the
candidate statement.  The true requirement for profitability is
that, if the candidate statement will be dead after transformation,
then so will the PHI candidate.

This patch fixes the problem by looking at the transitive reachability
of the PHI definitions.  If all paths terminate in the candidate
statement, then we know the PHI basis will go dead and we will not
make the code worse with the planned replacement.  To avoid compile
time issues, path search is arbitrarily terminated at depth 10.  The
new test is used throughout the cost calculation, so appears multiple
times in the code.

Also, I've added a check to avoid replacing multiply candidates with
a stride of 1.  Such a candidate is really a copy or cast statement,
and if we replace it, we will just generate a different copy or cast
statement.  I noticed this with one of the test cases from the PR
while debugging the problem.

I've updated the two test cases that were previously enabled only
with -fno-code-hoisting, removing that restriction.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no
regressions.  I've also tested this with SPEC cpu2006 and the
patch is performance neutral on a POWER8 box (as expected).  Is
this ok for trunk?

Thanks,
Bill


[gcc]

2016-06-16  Bill Schmidt  

* gimple-ssa-strength-reduction.c (uses_consumed_by_stmt): New
function.
(find_basis_for_candidate): Call uses_consumed_by_stmt rather than
has_single_use.
(slsr_process_phi): Likewise.
(replace_uncond_cands_and_profitable_phis): Don't replace a
multiply candidate with a stride of 1 (copy or cast).
(phi_incr_cost): Call uses_consumed_by_stmt rather than
has_single_use.
(lowest_cost_path): Likewise.
(total_savings): Likewise.

[gcc/testsuite]

2016-06-16  Bill Schmidt  

* gcc.dg/tree-ssa/slsr-35.c: Remove -fno-code-hoisting workaround.
* gcc.dg/tree-ssa/slsr-36.c: Likewise.


Index: gcc/gimple-ssa-strength-reduction.c
===
--- gcc/gimple-ssa-strength-reduction.c (revision 239241)
+++ gcc/gimple-ssa-strength-reduction.c (working copy)
@@ -475,6 +475,48 @@ find_phi_def (tree base)
   return c->cand_num;
 }
 
+/* Determine whether all uses of NAME are directly or indirectly
+   used by STMT.  That is, we want to know whether if STMT goes
+   dead, the definition of NAME also goes dead.  */
+static bool
+uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse)
+{
+  gimple *use_stmt;
+  imm_use_iterator iter;
+  bool retval = true;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
+{
+  if (use_stmt == stmt || is_gimple_debug (use_stmt))
+   continue;
+
+  if (!is_gimple_assign (use_stmt))
+   {
+ retval = false;
+ BREAK_FROM_IMM_USE_STMT (iter);
+   }
+
+  /* Limit recursion.  */
+  if (recurse >= 10)
+   {
+ retval = false;
+ BREAK_FROM_IMM_USE_STMT (iter);
+   }
+
+  tree next_name = gimple_get_lhs (use_stmt);
+  if (!next_name || !is_gimple_reg (next_name))
+   {
+ retval = false;
+ BREAK_FROM_IMM_USE_STMT (iter);
+   }
+
+  if (uses_consumed_by_stmt (next_name, stmt, recurse + 1))
+   continue;
+}
+
+  return retval;
+}
+
 /* Helper routine for find_basis_for_candidate.  May be called twice:
once for the candidate's base expr, and optionally again either for
the candidate's phi definition or for a CAND_REF's alternative base
@@ -550,7 +592,8 @@ find_basis_for_candidate (slsr_cand_t c)
 
  /* If we found a hidden basis, estimate additional dead-code
 savings if the phi and its feeding statements can be removed.  */
- if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
+ tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
+ if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt, 0))
c->dead_savings += phi_cand->dead_savings;
}
 }
@@ -777,7 +820,7 @@ slsr_process_phi (gphi *phi, bool speed)
 
  /* Gather potential dead code savings if the phi statement
 can be removed later on.  */
- if (has_single_use (arg))
+ if (uses_consumed_by_stmt (arg, phi, 0))
{
  if (gimple_code (arg_stmt) == GIMPLE_PHI)
savings += arg_cand->dead_savings;
@@ -2384,7 +2427,9 @@ replace_uncond_cands_and_profitable_phis (slsr_can
 {
   if (phi_dependent_cand_p 

Re: [PATCH v4, rs6000] gcc mainline, add builtin support for vec_float, vec_float2, vec_floate, vec_floate, builtins

2017-06-16 Thread Carl Love

GCC Maintainers:

I have addressed the latest comments on the patch from Segher,
formatting issues and renaming the new define_mode_attr.  I believe I
have addressed all of the issues.  I have reviewed the patch for
formatting issues.

I retested the changes on powerpc64le-unknown-linux-gnu (Power 8 LE)
only. 

Please let me know if there are any additional issues that need fixing.
Thanks.

Carl Love



gcc/ChangeLog:

2017-06-16  Carl Love  

* config/rs6000/rs6000-c.c (altivec_overloaded_builtins[]): Add
definitions for vec_float, vec_float2, vec_floato,
vec_floate built-ins.
* config/rs6000/vsx.md (define_c_enum "unspec"): Add RTL code
for instructions vsx_xvcvsxws vsx_xvcvuxwsp, float2, floato and
floate.
* config/rs6000/rs6000-builtin.def (FLOAT2_V2DI, FLOATE_V2D*,
FLOATO_V2D*, XVCVSXWSP_V4SF, UNS_FLOATO_V2DI, UNS_FLOATE_V2DI): Add
definitions.
* config/altivec.md (define_insn "p8_vmrgew_",
define_mode_attr VF_sxddp):Add V4SF type to p8_vmrgew.
* config/rs6000/altivec.h (vec_float, vec_float2, vec_floate,
vec_floato): Add builtin defines.
* doc/extend.texi (vec_float, vec_float2, vec_floate, vec_floato):
Update the built-in documentation file for the new built-in
functions.

gcc/testsuite/ChangeLog:

2017-06-16  Carl Love  

* gcc.target/powerpc/builtins-3-runnable.c (test_result_sp(),
main()): Add runnable tests and test checker for vec_float,
vec_float2, vec_floate and vec_floato builtins.
---
 gcc/config/rs6000/altivec.h|   4 +
 gcc/config/rs6000/altivec.md   |  17 ++-
 gcc/config/rs6000/rs6000-builtin.def   |  19 ++-
 gcc/config/rs6000/rs6000-c.c   |  28 +++-
 gcc/config/rs6000/rs6000-protos.h  |   1 +
 gcc/config/rs6000/rs6000.c |  45 +-
 gcc/config/rs6000/vsx.md   | 158 +
 gcc/doc/extend.texi|  14 ++
 .../gcc.target/powerpc/builtins-3-runnable.c   |  82 +++
 9 files changed, 356 insertions(+), 12 deletions(-)

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 20050eb..d542315 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -133,6 +133,10 @@
 #define vec_doublel __builtin_vec_doublel
 #define vec_doubleh __builtin_vec_doubleh
 #define vec_expte __builtin_vec_expte
+#define vec_float __builtin_vec_float
+#define vec_float2 __builtin_vec_float2
+#define vec_floate __builtin_vec_floate
+#define vec_floato __builtin_vec_floato
 #define vec_floor __builtin_vec_floor
 #define vec_loge __builtin_vec_loge
 #define vec_madd __builtin_vec_madd
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 487b9a4..fd15286 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -207,6 +207,9 @@
 ;; versus floating point
 (define_mode_attr VS_sxwsp [(V4SI "sxw") (V4SF "sp")])
 
+;; Mode attribute for vector floate and floato conversions
+(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
+
 ;; Specific iterator for parity which does not have a byte/half-word form, but
 ;; does have a quad word form
 (define_mode_iterator VParity [V4SI
@@ -1316,13 +1319,13 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Power8 vector merge even/odd
-(define_insn "p8_vmrgew"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-   (vec_select:V4SI
- (vec_concat:V8SI
-   (match_operand:V4SI 1 "register_operand" "v")
-   (match_operand:V4SI 2 "register_operand" "v"))
+;; Power8 vector merge two V4SF/V4SI even words to V4SF
+(define_insn "p8_vmrgew_"
+  [(set (match_operand:VSX_W 0 "register_operand" "=v")
+   (vec_select:VSX_W
+ (vec_concat:
+   (match_operand:VSX_W 1 "register_operand" "v")
+   (match_operand:VSX_W 2 "register_operand" "v"))
  (parallel [(const_int 0) (const_int 4)
 (const_int 2) (const_int 6)])))]
   "TARGET_P8_VECTOR"
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 241c439..4682628 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1591,6 +1591,8 @@ BU_VSX_2 (CMPLE_U16QI,"cmple_u16qi",CONST,  
vector_ngtuv16qi)
 BU_VSX_2 (CMPLE_U8HI, "cmple_u8hi", CONST,  vector_ngtuv8hi)
 BU_VSX_2 (CMPLE_U4SI, "cmple_u4si", CONST,  vector_ngtuv4si)
 BU_VSX_2 (CMPLE_U2DI, "cmple_u2di", CONST,  vector_ngtuv2di)
+BU_VSX_2 (FLOAT2_V2DI,"float2_v2di",CONST,  float2_v2di)
+BU_VSX_2 (UNS_FLOAT2_V2DI,"uns_float2_v2di",CONST,  uns_float2_v2di)
 
 /* VSX abs builtin functions.  */
 BU_VSX_A (XVABSDP,   "xvabsdp",CONST,  

Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Bin.Cheng
On Fri, Jun 16, 2017 at 5:16 PM, Richard Biener
 wrote:
> On June 16, 2017 3:31:32 PM GMT+02:00, "Bin.Cheng"  
> wrote:
>>On Fri, Jun 16, 2017 at 2:10 PM, Richard Biener
>> wrote:
>>> On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng 
>>wrote:
 On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
  wrote:
> On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng 
>>wrote:
>> Hi,
>> Loop split forces intermediate computation to gimple operands all
>>the time when
>> computing bound information.  This is not good since folding
>>opportunities are
>> missed.  This patch fixes the issue by feeding all computation to
>>folder and only
>> forcing to gimple operand at last.
>>
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> Hm?  It uses gimple_build () which should do the same as
>>fold_buildN in terms
> of simplification.
>
> So where does that not work?  It is supposed to be the prefered way
>>and no
> new code should use force_gimple_operand (unless dealing with
>>generic
> coming from other middle-end infrastructure like SCEV or niter
>>analysis)
 Hmm, current code calls force_gimpele operand several times which
 causes the inefficiency.  The patch avoids that and does one call at
 the end.
>>>
>>> But it forces to the same sequence that is used for extending the
>>expression
>>> so folding should work.  Where do you see that it does not?  Note the
>>> code uses gimple_build (), not gimple_build_assign ().
>>In spec2k6/hmmer, when building fast_algorithms.c with below command
>>line:
>>./gcc -Ofast -S fast_algorithms.c -o fast_algorithms.S -fdump-tree-all
>>-fdump-tree-lsplit
>>The lsplit dump contains:
>>   [12.75%]:
>>  _124 = _197 + 1;
>>  _123 = _124 + -1;
>>  _115 = MIN_EXPR <_197, _124>;
>>Which is generated here.
>
> That means we miss a pattern in match.PD to handle this case.
I see.  I will withdraw this patch and look in that direction.

Thanks,
bin
>
> Richard.
>
>>Thanks,
>>bin
>>>
>>> Richard.
>>>
 Thanks,
 bin
>
> Richard.
>
>>
>> Thanks,
>> bin
>> 2017-06-12  Bin Cheng  
>>
>> * tree-ssa-loop-split.c (compute_new_first_bound): Feed
>>bound
>> computation to folder, rather than force to gimple
>>operands too
>> early.
>


[PATCH][PR sanitizer/77631] Support separate debug info in libbacktrace

2017-06-16 Thread Denis Khalikov

Hello everyone,

This is a patch for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77631

Can some one please review attached patch.

Thanks.
From ae74cf3d632b06a91a986e32e3a6c16223767b24 Mon Sep 17 00:00:00 2001
From: Denis Khalikov 
Date: Fri, 16 Jun 2017 12:13:13 +0300
Subject: [PATCH] PR sanitizer/77631

	* Makefile.in: Regenerated.
	* configure.ac: Add searching for limits.h, sys/param.h
	* config.h.in: Regenerated.
	* configure: Regenerated.
	* elf.c (enum type_of_file): New enum.
	(enum type_of_elf): New enum.
	(enum debug_path): New enum.
	(getl32): New function.
	(gnu_debuglink_crc32): New function. Generate crc32 sum.
	(get_crc32): New function.
	(pathlen): New function.
	(check_sum): New function. Verify sum.
	(process_elf_header): New function. Verify elf header.
	(elf_get_section_by_name): New function. Get section by name.
	(backtrace_readlink): New function. Get type of file from filename.
	(resolve_realname): New function. Resolve real name if file is link.
	(backtrace_resolve_realname): New function. Resolve real name for any
	file type.
	(search_for_debugfile): New function. Search for debug file in known
	paths.
	(open_debugfile_by_gnulink): New function. Open debug file with
	gnulink.
	(hex): New function. Convert to hex.
	(get_build_id_name): New function. Generate build-id name.
	(open_debugfile_by_build_id): New function. Open debug file with
	build-id.
	(backtrace_open_debugfile): New function. Open debug file.
	(elf_add): Move code which reads elf header to elf_header_is_valid.
	(phdr_callback): Call backtrace_open_debugfile function for shared
	library.
	* fileline.c (fileline_initialize): Call backtrace_open_debugfile for
	executable.
	* internal.h: Updated.
---
 libbacktrace/ChangeLog|  37 ++
 libbacktrace/Makefile.in  |   2 +-
 libbacktrace/config.h.in  |   6 +
 libbacktrace/configure|  26 ++
 libbacktrace/configure.ac |   4 +
 libbacktrace/elf.c| 949 +-
 libbacktrace/fileline.c   |  13 +-
 libbacktrace/internal.h   |   8 +
 8 files changed, 958 insertions(+), 87 deletions(-)

diff --git a/libbacktrace/ChangeLog b/libbacktrace/ChangeLog
index 096ceb6..4bd97f3 100644
--- a/libbacktrace/ChangeLog
+++ b/libbacktrace/ChangeLog
@@ -1,3 +1,40 @@
+2017-06-16  Denis Khalikov  
+
+	PR sanitizer/77631
+	* Makefile.in: Regenerated.
+	* configure.ac: Add searching for limits.h, sys/param.h
+	* config.h.in: Regenerated.
+	* configure: Regenerated.
+	* elf.c (enum type_of_file): New enum.
+	(enum type_of_elf): New enum.
+	(enum debug_path): New enum.
+	(getl32): New function.
+	(gnu_debuglink_crc32): New function. Generate crc32 sum.
+	(get_crc32): New function.
+	(pathlen): New function.
+	(check_sum): New function. Verify sum.
+	(process_elf_header): New function. Verify elf header.
+	(elf_get_section_by_name): New function. Get section by name.
+	(backtrace_readlink): New function. Get type of file from filename.
+	(resolve_realname): New function. Resolve real name if file is link.
+	(backtrace_resolve_realname): New function. Resolve real name for any
+	file type.
+	(search_for_debugfile): New function. Search for debug file in known
+	paths.
+	(open_debugfile_by_gnulink): New function. Open debug file with
+	gnulink.
+	(hex): New function. Convert to hex.
+	(get_build_id_name): New function. Generate build-id name.
+	(open_debugfile_by_build_id): New function. Open debug file with
+	build-id.
+	(backtrace_open_debugfile): New function. Open debug file.
+	(elf_add): Move code which reads elf header to elf_header_is_valid.
+	(phdr_callback): Call backtrace_open_debugfile function for shared
+	library.
+	* fileline.c (fileline_initialize): Call backtrace_open_debugfile for
+	executable.
+	* internal.h: Updated.
+
 2017-05-02  Release Manager
 
 	* GCC 7.1.0 released.
diff --git a/libbacktrace/Makefile.in b/libbacktrace/Makefile.in
index de74b5d..e604b6c 100644
--- a/libbacktrace/Makefile.in
+++ b/libbacktrace/Makefile.in
@@ -16,7 +16,7 @@
 @SET_MAKE@
 
 # Makefile.am -- Backtrace Makefile.
-# Copyright (C) 2012-2016 Free Software Foundation, Inc.
+# Copyright (C) 2012-2017 Free Software Foundation, Inc.
 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
diff --git a/libbacktrace/config.h.in b/libbacktrace/config.h.in
index 87cb805..edbd9af 100644
--- a/libbacktrace/config.h.in
+++ b/libbacktrace/config.h.in
@@ -28,6 +28,9 @@
 /* Define to 1 if you have the  header file. */
 #undef HAVE_INTTYPES_H
 
+/* Define 1 if  is available. */
+#undef HAVE_LIMITS_H
+
 /* Define to 1 if you have the  header file. */
 #undef HAVE_LINK_H
 
@@ -52,6 +55,9 @@
 /* Define to 1 if you have the  header file. */
 #undef HAVE_SYS_MMAN_H
 
+/* Define 1 if  is available. */
+#undef HAVE_SYS_PARAM_H
+
 /* Define to 1 if you have the  header file. */
 #undef HAVE_SYS_STAT_H
 
diff --git 

Re: [Patch, ARM, testsuite] Add -mfloat-abi=hard to arm_neon_ok

2017-06-16 Thread Christophe Lyon
ping?

On 7 June 2017 at 11:13, Christophe Lyon  wrote:
> Hi,
>
>
> On 2 June 2017 at 16:19, Christophe Lyon  wrote:
>> Hi,
>>
>> I have recently updated the dejagnu version I use for
>> cross-testing arm and aarch64 toolchains to 1.6+. One of the side
>> effects was mentioned by Jonathan in
>> https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01267.html. Since I
>> use multilibs to test many configurations, I noticed several
>> changes in the results I get.
>>
>> In particular, on arm-none-linux-gnueabihf with -march=armv5t,
>> all the tests that require arm_neon_ok fail to compile because
>> they now use -march=armv5t -mfpu=neon -mfloat-abi=softfp
>> -march=armv7-a, which leads to a failure to include
>> gnu/stubs-soft.h (not present since the target is
>> 'hf'). Previously, -march=armv5t was appended, making the tests
>> unsupported because -mfpu=neon conflicts with -march=armv5t. Now,
>> arm_neon_ok succeeds because it only checks if some preprocessor
>> defines are present.
>>
>> This patch fixes that by including arm_neon.h in arm_neon_ok, such
>> that it fails for unsupported cases. However, since most of these
>> tests should pass instead of becoming unsupported, I have added flag
>> combinations with -mfloat-abi=hard.
>>
>> However, this is not sufficient to make the
>> gcc.target/arm/lto/pr65837* tests pass: they do not require
>> arm_neon_ok, and when I tried to add it, they still failed
>> because these lto tests do not support dg-add-options. My
>> proposal is to add a new
>> check_effective_target_arm_neon_ok_no_float_abi function which
>> tries to use neon without trying to change the -mfloat-abi
>> setting (that is, the same as arm_neon_ok, with only ""
>> and "-mfpu=neon" in the list of flags) . This makes these two lto
>> tests unsupported for non-hf targets (again because
>> gnu/stubs-soft.h is not present).
>>
>> To make them pass on "hf" targets:
>> - I added -mfpu=neon to dg-lto-options in pr65837-attr_0.c,
>>   because the fpu attributes in arm_neon.h only work if
>>   -mfpu=neon is enabled
>> - I removed dg-suppress-ld-options {-mfpu=neon} from pr65837_0.c,
>>   -mfpu=neon is needed for the test to compile with toolchains
>>   where the default fpu is not neon (eg vfpv3-d16-fp16)
>>
>> On arm-none-linux-gnueabihf --with-cpu=cortex-a9 --with-fpu=vfp
>> and multilib test flag=-march=armv5t, this patch brings:
>> - 2 UNRESOLVED -> FAIL (gcc.dg/vect/vect-align-1.c)
>> - 14 UNRESOLVED -> XPASS (in gcc.dg/vect/)
>> - 2765 new PASS
>> - 3639 FAIL -> PASS
>> - 1826 UNRESOLVED -> PASS
>> - 102 UNRESOLVED -> XFAIL
>>
>> as visible in the red cell at
>> http://people.linaro.org/~christophe.lyon/cross-validation/gcc-test-patches/248552-gnu-stubs9.patch/report-build-info.html
>> (the build-failed line can be ignored, it was caused by a server
>> problem)
>>
>> Sorry, the explanation is almost longer than the patch :-)
>>
>> Is it OK for trunk?
>> (Just realizing that I forgot to document the new functions :( )
>>
>
> Here is an updated version with a bit of documentation for the new
> effective target.
> arm_neon_ok_no_float_abi now only tries to add -mfpu=neon, not ""
> since we always
> add -mfpu=neon in the lto tests anyway.
>
> OK?
>
>
>> Thanks,
>>
>> Christophe


Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Richard Biener
On June 16, 2017 3:31:32 PM GMT+02:00, "Bin.Cheng"  
wrote:
>On Fri, Jun 16, 2017 at 2:10 PM, Richard Biener
> wrote:
>> On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng 
>wrote:
>>> On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
>>>  wrote:
 On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng 
>wrote:
> Hi,
> Loop split forces intermediate computation to gimple operands all
>the time when
> computing bound information.  This is not good since folding
>opportunities are
> missed.  This patch fixes the issue by feeding all computation to
>folder and only
> forcing to gimple operand at last.
>
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

 Hm?  It uses gimple_build () which should do the same as
>fold_buildN in terms
 of simplification.

 So where does that not work?  It is supposed to be the prefered way
>and no
 new code should use force_gimple_operand (unless dealing with
>generic
 coming from other middle-end infrastructure like SCEV or niter
>analysis)
>>> Hmm, current code calls force_gimpele operand several times which
>>> causes the inefficiency.  The patch avoids that and does one call at
>>> the end.
>>
>> But it forces to the same sequence that is used for extending the
>expression
>> so folding should work.  Where do you see that it does not?  Note the
>> code uses gimple_build (), not gimple_build_assign ().
>In spec2k6/hmmer, when building fast_algorithms.c with below command
>line:
>./gcc -Ofast -S fast_algorithms.c -o fast_algorithms.S -fdump-tree-all
>-fdump-tree-lsplit
>The lsplit dump contains:
>   [12.75%]:
>  _124 = _197 + 1;
>  _123 = _124 + -1;
>  _115 = MIN_EXPR <_197, _124>;
>Which is generated here.

That means we miss a pattern in match.PD to handle this case.

Richard.

>Thanks,
>bin
>>
>> Richard.
>>
>>> Thanks,
>>> bin

 Richard.

>
> Thanks,
> bin
> 2017-06-12  Bin Cheng  
>
> * tree-ssa-loop-split.c (compute_new_first_bound): Feed
>bound
> computation to folder, rather than force to gimple
>operands too
> early.



Re: [PATCH 2/2] DWARF: make it possible to emit debug info for declarations only

2017-06-16 Thread Pierre-Marie de Rodat

On 05/31/2017 11:08 AM, Pierre-Marie de Rodat wrote:

On 05/31/2017 09:34 AM, Richard Biener wrote:

Actually for the bigger picture I'd refactor
rest_of_decl_compilation, not calling it from the frontends but
rely on finalize_decl/function.  The missing part would then be
calling the dwarf hook which should eventually be done at some of
the places the frontends now call rest_of_decl_compliation.


I put some thought about this, but I suppose I don’t yet understand well 
enough the relation between what rest_of_decl_compilation and 
finalize_decl/function do. So I’ve tried to go half-way: I moved the 
“specification?” guard from the DWARF back-end to callers of the 
early_global_decl hook. In the end, this yielded a very small middle-end 
change: almost all hook calls in front-ends are for variables or 
namespaces, not for functions.



But for an easier way (you might still explore the above ;)) just remove
the guards from dwarf2out.c and handle it more like types that we
prune if they end up being unused (OTOH I guess we don't refer to
the decl DIEs from "calls" because not all calls are refered to with
standard DWARF -- the GNU callsite stuff refers them I think but those
get generated too late).

That said, when early_finish is called the cgraph and IPA references
exists and thus you can
sort-of see which functions are "used".


Ok, thanks. I’ll give a try to the first option, then. :-)


I finally decided not to implement this scheme, as it does not give the 
same results for the case in Ada that motivated this change: it would 
generate potentially one DIE per “calling unit” per called function, 
which is quite suboptimal compared to one DIE per subprogram definition 
or subprogram import. This would look like a debug info bloat for 
debatable gain.


So here’s an updated patch, without the new debug hook. It boostrapped 
and regtested fine on x86_64-linux. After this change, I observed an 
increase of:


  * an increase of ~22KB for gnat1 (base is 210MB);
  * a decrease (?) of ~3KB for cc1 (base is 197MB);
  * a similar decrease of 3KB for cc1plus (base is 220MB).

Ok to commit?

--
Pierre-Marie de Rodat
>From 107f45b13334b5223004a3fb3cf251fd32c35055 Mon Sep 17 00:00:00 2001
From: Pierre-Marie de Rodat 
Date: Tue, 6 Jun 2017 10:52:51 +0200
Subject: [PATCH] DWARF: make it possible to emit debug info for declarations
 only

The DWARF back-end used to systematically ignore file-scope function and
variable declarations.  While this is justified in language like C/C++,
where such declarations can appear in several translation units and thus
bloat uselessly the debug info, this behavior is counter-productive in
languages with a well-defined module system.  Specifically, it prevents
the description of imported entities, that belong to foreign languages,
making them unavailable from debuggers.

Take for instance:

package C_Binding is
function My_C_Function (I : Integer) return Integer;
pragma Import (C, My_C_Function, "my_c_function");
end C_Binding;

This makes available for Ada programs the C function "my_c_function"
under the following name: C_Binding.My_C_Function.  When GCC compiles
it, though, it is represented as a FUNCTION_DECL node with DECL_EXTERNAL
set and a null DECL_INITIAL, which used to be discarded unconditionally
in the DWARF back-end.

This patch moves such filter from the DWARF back-end to the relevant
callers: passes.c:rest_of_decl_compilation and
godump.c:go_early_global_decl. It also This patch also updates the Ada
front-end to call debug hooks for functions such as in the above
example, so that we do generate debugging information for them.

gcc/
	* dwarf2out.c (gen_decl_die): Remove the guard to skip file-scope
	FUNCTION_DECL declarations.
	(dwarf2out_early_global_decl): Remove the guard to skip FUNCTION_DECL
	declarations.
	(dwaf2out_decl): Likewise.
	* godump.c (go_early_global_decl): Skip call to the real debug hook
	for FUNCTION_DECL declarations.
	* passes.c (rest_of_decl_compilation): Skip call to the
	early_global_decl debug hook for FUNCTION_DECL declarations, unless
	-fdump-go-spec is passed.

gcc/ada/
	* gcc-interface/ada-tree.h (DECL_FUNCTION_IS_DEF): Update copyright
	notice.  New macro.
	* gcc-interface/trans.c (Subprogram_Body_to_gnu): Tag the subprogram
	as a definition.
	(Compilation_Unit_to_gnu): Tag the elaboration procedure as a
	definition.
	* gcc-interface/decl.c (gnat_to_gnu_entity): Tag declarations of
	imported subprograms for the current compilation unit as
	definitions.  Disable debug info for references to variables.
	* gcc-interface/gigi.h (create_subprog_decl): Update declaration.
	* gcc-interface/utils.c (gnat_pushdecl): Add external DECLs that are
	not built-in functions to their binding scope.
	(create_subprog_decl): Add a DEFINITION parameter.  If it is true, tag
	the function as a definition.  Update all callers.
	(gnat_write_global_declarations): Emit debug info for imported
	functions.  Filter out 

Re: [PATCH] [PR79542][Ada] Fix ICE in dwarf2out.c with nested func. inlining

2017-06-16 Thread Pierre-Marie de Rodat

On 05/26/2017 04:12 PM, Pierre-Marie de Rodat wrote:
I tried this, but I got a crash when compiling the Ada runtime 
(g-awk.adb). I could not extract a reproducer, but the idea is that 
because of the call to set_decl_origin_self, some DECLs have themselves 
as DECL_ABSTRACT_ORIGIN. As a result, my patch in modified_type_die does 
not prevent execution from calling gen_typedef_die with a DECL that has 
a non-null abstract origin. So I have to soften the assertion so that 
this specific case was still allowed in gen_typedef_die.


So here’s the update patch: bootstrapped and regtested fine on 
x86_64-linux.


Ping for the updated patch, originally submitted at 
https://gcc.gnu.org/ml/gcc-patches/2017-05/msg02049.html


Thanks!

--
Pierre-Marie de Rodat


Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Marc Glisse

On Fri, 16 Jun 2017, Bin.Cheng wrote:


On Fri, Jun 16, 2017 at 5:16 PM, Richard Biener
 wrote:

On June 16, 2017 3:31:32 PM GMT+02:00, "Bin.Cheng"  
wrote:

On Fri, Jun 16, 2017 at 2:10 PM, Richard Biener
 wrote:

On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng 

wrote:

On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
 wrote:

On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng 

wrote:

Hi,
Loop split forces intermediate computation to gimple operands all

the time when

computing bound information.  This is not good since folding

opportunities are

missed.  This patch fixes the issue by feeding all computation to

folder and only

forcing to gimple operand at last.

Bootstrap and test on x86_64 and AArch64.  Is it OK?


Hm?  It uses gimple_build () which should do the same as

fold_buildN in terms

of simplification.

So where does that not work?  It is supposed to be the prefered way

and no

new code should use force_gimple_operand (unless dealing with

generic

coming from other middle-end infrastructure like SCEV or niter

analysis)

Hmm, current code calls force_gimpele operand several times which
causes the inefficiency.  The patch avoids that and does one call at
the end.


But it forces to the same sequence that is used for extending the

expression

so folding should work.  Where do you see that it does not?  Note the
code uses gimple_build (), not gimple_build_assign ().

In spec2k6/hmmer, when building fast_algorithms.c with below command
line:
./gcc -Ofast -S fast_algorithms.c -o fast_algorithms.S -fdump-tree-all
-fdump-tree-lsplit
The lsplit dump contains:
  [12.75%]:
 _124 = _197 + 1;
 _123 = _124 + -1;
 _115 = MIN_EXPR <_197, _124>;
Which is generated here.


That means we miss a pattern in match.PD to handle this case.

I see.  I will withdraw this patch and look in that direction.


For _123, we have

  /* (A +- CST1) +- CST2 -> A + CST3
or
/* Associate (p +p off1) +p off2 as (p +p (off1 + off2)).  */


For _115, we have

/* min (a, a + CST) -> a where CST is positive.  */
/* min (a, a + CST) -> a + CST where CST is negative. */
(simplify
 (min:c @0 (plus@2 @0 INTEGER_CST@1))
  (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
   (if (tree_int_cst_sgn (@1) > 0)
@0
@2)))

What is the type of all those SSA_NAMEs?

--
Marc Glisse


Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Bin.Cheng
On Fri, Jun 16, 2017 at 5:48 PM, Marc Glisse  wrote:
> On Fri, 16 Jun 2017, Bin.Cheng wrote:
>
>> On Fri, Jun 16, 2017 at 5:16 PM, Richard Biener
>>  wrote:
>>>
>>> On June 16, 2017 3:31:32 PM GMT+02:00, "Bin.Cheng"
>>>  wrote:

 On Fri, Jun 16, 2017 at 2:10 PM, Richard Biener
  wrote:
>
> On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng 

 wrote:
>>
>> On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
>>  wrote:
>>>
>>> On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng 

 wrote:

 Hi,
 Loop split forces intermediate computation to gimple operands all

 the time when

 computing bound information.  This is not good since folding

 opportunities are

 missed.  This patch fixes the issue by feeding all computation to

 folder and only

 forcing to gimple operand at last.

 Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>
>>>
>>> Hm?  It uses gimple_build () which should do the same as

 fold_buildN in terms
>>>
>>> of simplification.
>>>
>>> So where does that not work?  It is supposed to be the prefered way

 and no
>>>
>>> new code should use force_gimple_operand (unless dealing with

 generic
>>>
>>> coming from other middle-end infrastructure like SCEV or niter

 analysis)
>>
>> Hmm, current code calls force_gimpele operand several times which
>> causes the inefficiency.  The patch avoids that and does one call at
>> the end.
>
>
> But it forces to the same sequence that is used for extending the

 expression
>
> so folding should work.  Where do you see that it does not?  Note the
> code uses gimple_build (), not gimple_build_assign ().

 In spec2k6/hmmer, when building fast_algorithms.c with below command
 line:
 ./gcc -Ofast -S fast_algorithms.c -o fast_algorithms.S -fdump-tree-all
 -fdump-tree-lsplit
 The lsplit dump contains:
   [12.75%]:
  _124 = _197 + 1;
  _123 = _124 + -1;
  _115 = MIN_EXPR <_197, _124>;
 Which is generated here.
>>>
>>>
>>> That means we miss a pattern in match.PD to handle this case.
>>
>> I see.  I will withdraw this patch and look in that direction.
>
>
> For _123, we have
>
>   /* (A +- CST1) +- CST2 -> A + CST3
> or
> /* Associate (p +p off1) +p off2 as (p +p (off1 + off2)).  */
>
>
> For _115, we have
>
> /* min (a, a + CST) -> a where CST is positive.  */
> /* min (a, a + CST) -> a + CST where CST is negative. */
> (simplify
>  (min:c @0 (plus@2 @0 INTEGER_CST@1))
>   (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
>(if (tree_int_cst_sgn (@1) > 0)
> @0
> @2)))
>
> What is the type of all those SSA_NAMEs?
Hi Marc,
Thanks for pointing out the exact patterns.  The variables are of int
type.  The redundant operation disappears in reduced test case though.

Thanks,
bin
>
> --
> Marc Glisse


Re: [PATCH] fix PR ada/80888

2017-06-16 Thread Simon Wright
Ping
If OK, can it be applied please?
(patch applies cleanly to current sources)

> On 27 May 2017, at 16:58, Simon Wright  wrote:
> 
> The GNAT reference manual says in 11.6 Wide_Text_IO
> ,
> 
>  "The default encoding method for the standard files, and for opened
>  files for which no WCEM parameter is given in the FORM string matches
>  the wide character encoding specified for the main program (the
>  default being brackets encoding if no coding method was specified with
>  -gnatW)."
> 
> This is not true; the default is brackets encoding regardless of the
> coding method specified with -gnatW.
> 
> The attached patch (to 7.1.0) corrects this. Tested on
> x86_84-apple-darwin15 by rebuilding the library (cd gcc; make gnatlib
> gnatlib-shared) and make -j4 check-ada,
> 
>   === acats Summary ===
> # of expected passes  2320
> # of unexpected failures  0
> /Volumes/Miscellaneous/tmp/gcc-7.1.0/gcc/testsuite/ada/acats/run_all.sh 
> completed at Fri 26 May 2017 15:44:52 BST
> 
>   === gnat Summary ===
> 
> # of expected passes  2569
> # of expected failures24
> # of unsupported tests7
> /Volumes/Miscellaneous/tmp/gcc-7.1.0-build/gcc/gnatmake version 7.1.0
> 
> gcc/ada/Changelog:
> 
>   2017-05-27 Simon Wright 
> 
>   PR ada/80888
>   * a-textio.adb (Set_WCEM): default the file's wide character encoding
> method to Default_WCEM, not WCEM_Brackets.
>   * a-witeio.adb: likewise.
>   * a-ztexio.adb: likewise.
> 
> 



Re: [PATCH GCC][1/2]Feed bound computation to folder in loop split

2017-06-16 Thread Andrew Pinski
On Fri, Jun 16, 2017 at 9:48 AM, Marc Glisse  wrote:
> On Fri, 16 Jun 2017, Bin.Cheng wrote:
>
>> On Fri, Jun 16, 2017 at 5:16 PM, Richard Biener
>>  wrote:
>>>
>>> On June 16, 2017 3:31:32 PM GMT+02:00, "Bin.Cheng"
>>>  wrote:

 On Fri, Jun 16, 2017 at 2:10 PM, Richard Biener
  wrote:
>
> On Fri, Jun 16, 2017 at 3:06 PM, Bin.Cheng 

 wrote:
>>
>> On Fri, Jun 16, 2017 at 11:49 AM, Richard Biener
>>  wrote:
>>>
>>> On Wed, Jun 14, 2017 at 3:07 PM, Bin Cheng 

 wrote:

 Hi,
 Loop split forces intermediate computation to gimple operands all

 the time when

 computing bound information.  This is not good since folding

 opportunities are

 missed.  This patch fixes the issue by feeding all computation to

 folder and only

 forcing to gimple operand at last.

 Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>
>>>
>>> Hm?  It uses gimple_build () which should do the same as

 fold_buildN in terms
>>>
>>> of simplification.
>>>
>>> So where does that not work?  It is supposed to be the prefered way

 and no
>>>
>>> new code should use force_gimple_operand (unless dealing with

 generic
>>>
>>> coming from other middle-end infrastructure like SCEV or niter

 analysis)
>>
>> Hmm, current code calls force_gimpele operand several times which
>> causes the inefficiency.  The patch avoids that and does one call at
>> the end.
>
>
> But it forces to the same sequence that is used for extending the

 expression
>
> so folding should work.  Where do you see that it does not?  Note the
> code uses gimple_build (), not gimple_build_assign ().

 In spec2k6/hmmer, when building fast_algorithms.c with below command
 line:
 ./gcc -Ofast -S fast_algorithms.c -o fast_algorithms.S -fdump-tree-all
 -fdump-tree-lsplit
 The lsplit dump contains:
   [12.75%]:
  _124 = _197 + 1;
  _123 = _124 + -1;
  _115 = MIN_EXPR <_197, _124>;
 Which is generated here.
>>>
>>>
>>> That means we miss a pattern in match.PD to handle this case.
>>
>> I see.  I will withdraw this patch and look in that direction.
>
>
> For _123, we have
>
>   /* (A +- CST1) +- CST2 -> A + CST3
> or
> /* Associate (p +p off1) +p off2 as (p +p (off1 + off2)).  */
>
>
> For _115, we have
>
> /* min (a, a + CST) -> a where CST is positive.  */
> /* min (a, a + CST) -> a + CST where CST is negative. */
> (simplify
>  (min:c @0 (plus@2 @0 INTEGER_CST@1))
>   (if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
>(if (tree_int_cst_sgn (@1) > 0)
> @0
> @2)))
>
> What is the type of all those SSA_NAMEs?

https://gcc.gnu.org/ml/gcc-patches/2016-12/msg01352.html
which added the min/max patterns.  I forgot to get Naveen to mention I
saw this while looking into loop splitting and why I was adding them.

Thanks,
Andrew Pinski

>
> --
> Marc Glisse


Re: [PATCH/AARCH64] Improve aarch64 conditional compare usage

2017-06-16 Thread Steve Ellcey

https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00021.html

Ping.

Steve Ellcey
sell...@cavium.com


Make edge profiling slightly faster

2017-06-16 Thread Jan Hubicka
Hi,
edge profling builds spanning tree and then intstruments all remaining
edges of CFG.  With branch prediction we could pick up those edges that
are expected to execute more often saving some of -fprofile-generate overhead
(about 3% on tramp3d but likely more on testcases with more complicated control
flow).

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* profile.c (compare_freqs): New function.
(branch_prob): Sort edge list.
(find_spanning_tree): Assume that the list is priority sorted.

* gcc.dg/tree-ssa/ssa-lim-11.c: Disable branch prediction.

Index: profile.c
===
--- profile.c   (revision 249223)
+++ profile.c   (working copy)
@@ -987,6 +987,27 @@ output_location (char const *file_name,
 }
 }
 
+/* Helper for qsort so edges get sorted from highest frequency to smallest.
+   This controls the weight for minimal spanning tree algorithm  */
+static int
+compare_freqs (const void *p1, const void *p2)
+{
+  const_edge e1 = *(const const_edge *)p1;
+  const_edge e2 = *(const const_edge *)p2;
+
+  /* Critical edges needs to be split which introduce extra control flow.
+ Make them more heavy.  */
+  int m1 = EDGE_CRITICAL_P (e1) ? 2 : 1;
+  int m2 = EDGE_CRITICAL_P (e2) ? 2 : 1;
+
+  if (EDGE_FREQUENCY (e1) * m1 + m1 != EDGE_FREQUENCY (e2) * m2 + m2)
+return EDGE_FREQUENCY (e2) * m2 + m2 - EDGE_FREQUENCY (e1) * m1 - m1;
+  /* Stabilize sort.  */
+  if (e1->src->index != e2->src->index)
+return e2->src->index - e1->src->index;
+  return e2->dest->index - e1->dest->index;
+}
+
 /* Instrument and/or analyze program behavior based on program the CFG.
 
This function creates a representation of the control flow graph (of
@@ -1140,6 +1161,7 @@ branch_prob (void)
 
   el = create_edge_list ();
   num_edges = NUM_EDGES (el);
+  qsort (el->index_to_edge, num_edges, sizeof (edge), compare_freqs);
   alloc_aux_for_edges (sizeof (struct edge_profile_info));
 
   /* The basic blocks are expected to be numbered sequentially.  */
@@ -1431,22 +1453,8 @@ find_spanning_tree (struct edge_list *el
}
 }
 
-  /* Now insert all critical edges to the tree unless they form a cycle.  */
-  for (i = 0; i < num_edges; i++)
-{
-  edge e = INDEX_EDGE (el, i);
-  if (EDGE_CRITICAL_P (e) && !EDGE_INFO (e)->ignore
- && find_group (e->src) != find_group (e->dest))
-   {
- if (dump_file)
-   fprintf (dump_file, "Critical edge %d to %d put to tree\n",
-e->src->index, e->dest->index);
- EDGE_INFO (e)->on_tree = 1;
- union_groups (e->src, e->dest);
-   }
-}
-
-  /* And now the rest.  */
+  /* And now the rest.  Edge list is sorted according to frequencies and
+ thus we will produce minimal spanning tree.  */
   for (i = 0; i < num_edges; i++)
 {
   edge e = INDEX_EDGE (el, i);


Re: [PATCH GCC][12/13]Workaround reduction statements for distribution

2017-06-16 Thread Bin.Cheng
On Fri, Jun 16, 2017 at 11:21 AM, Richard Biener
 wrote:
> On Mon, Jun 12, 2017 at 7:03 PM, Bin Cheng  wrote:
>> Hi,
>> For now, loop distribution handles variables used outside of loop as 
>> reduction.
>> This is inaccurate because all partitions contain statement defining 
>> induction
>> vars.
>
> But final induction values are usually not used outside of the loop...
This is in actuality for induction variable which is used outside of the loop.
>
> What is missing is loop distribution trying to change partition order.  In 
> fact
> we somehow assume we can move a reduction across a detected builtin
> (I don't remember if we ever check for validity of that...).
Hmm, I am not sure when we can't.  If there is any dependence between
builtin/reduction partitions, it should be captured by RDG or PG,
otherwise the partitions are independent and can be freely ordered as
long as reduction partition is scheduled last?
>
>> Ideally we should factor out scev-propagation as a standalone interface
>> which can be called when necessary.  Before that, this patch simply 
>> workarounds
>> reduction issue by checking if the statement belongs to all partitions.  If 
>> yes,
>> the reduction must be computed in the last partition no matter how the loop 
>> is
>> distributed.
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>
> stmt_in_all_partitions is not kept up-to-date during partition merging and if
> merging makes the reduction partition(s) pass the stmt_in_all_partitions
> test your simple workaround doesn't work ...
I think it doesn't matter because:
  A) it's really workaround for induction variables.  In general,
induction variables are included by all partition.
  B) After classify partition, we immediately fuses all reduction
partitions.  More stmt_in_all_partitions means we are fusing
non-reduction partition with reduction partition, so the newly
generated (stmt_in_all_partitions) are actually not reduction
statements.  The workaround won't work anyway even the bitmap is
maintained.
>
> As written it's a valid optimization but can you please note it's limitation 
> in
> some comment please?
Yeah, I will add comment explaining it.

Thanks,
bin
>
> Also...
>
> +  bitmap_set_range (stmt_in_all_partitions, 0, rdg->n_vertices);
> +  rdg_build_partitions (rdg, stmts, , stmt_in_all_partitions);
>
> ick.  Please instead do
>
>bitmap_copy (smtt_in_all_partitions, partitions[0]->stmts);
>for (i = 1; i < ...)
>  bitmap_and_into (stmt_in_all_partitons, partitions[i]->stmts);
>
> Thanks,
> Richard.
>
>> Thanks,
>> bin
>> 2017-06-07  Bin Cheng  
>>
>> * tree-loop-distribution.c (classify_partition): New parameter and
>> better handle reduction statement.
>> (rdg_build_partitions): New parameter and record statements belonging
>> to all partitions.
>> (distribute_loop): Update use of above functions.


Re: [patch, libfortran] Speed up cshift for dim > 1

2017-06-16 Thread Jerry DeLisle
On 06/14/2017 12:41 PM, Thomas Koenig wrote:
> Hello world,
> 
> the attached patch implements a blocked algorithm for
> improving the speed of cshift for dim > 1.
> 
> It uses the fact that
> 
>   integer, dimension (n1,n2,n3) :: a, b
> 
>   b = cshift(a,shift,3)
> 
> is identical, as far as the memory locations is concerned.
> 
>   integer, dimension (n1*n2*n3) :: c, d
>   d = cshift(c, shift*n1*n2, 1)
> 
> The speedup is quite large; from being really slow for
> dim > 1, this patch makes it go even faster.
> 
> Below there are some comparisons for the attached benchmark,
> do-1.f90. gfortran-7 uses the old library version.
> 
> Interestingly, the library version is also much faster
> than an implementation of straight DO loops.
> 
> Regression-tested.  OK for trunk?
> 

OK for trunk.

Thanks,

Jerry