[PATCH] Force rtl templates to be inlined

2014-09-02 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

I noticed that with the trunk compiler a range of the new rtl
inlines show up as hot in a profiler during stage1. I think
that happens because stage1 is not using optimization
and does not inline plain inline.  And these rtl inlines
are very frequently called.

Mark them all with __attribute__((always_inline)) which forces
inlining even with -O0.

Passes bootstrap and testing on x86_64-linux.

Cc: dmalc...@redhat.com

include/:

2014-09-01  Andi Kleen  a...@linux.intel.com

* ansidecl.h (ALWAYS_INLINE): Add.

gcc/:

2014-09-01  Andi Kleen  a...@linux.intel.com

* rtl.h (is_a_helper): Change inline to ALWAYS_INLINE.
(rhs_regno): Dito.
(init_costs_to_max): Dito.
(init_costs_to_zero): Dito.
(costs_lt_p): Dito.
(costs_add_n_insns): Dito.
(wi::int_traits ::get_precision): Dito.
(wi::shwi): Dito.
(wi::min_value): Dito.
(wi::max_value): Dito.
(set_rtx_cost): Dito.
(get_full_set_rtx_cost): Dito.
(set_src_cost): Dito.
(get_full_set_src_cost): Dito.
(get_mem_attrs): Dito.
---
 gcc/rtl.h  | 111 +++--
 include/ansidecl.h |   6 +++
 2 files changed, 62 insertions(+), 55 deletions(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index beeed2f..d711e43 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_RTL_H
 
 #include utility
+#include ansidecl.h
 #include statistics.h
 #include machmode.h
 #include input.h
@@ -418,7 +419,7 @@ public:
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_expr_list *::test (rtx rt)
 {
   return rt-code == EXPR_LIST;
@@ -447,7 +448,7 @@ public:
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_insn_list *::test (rtx rt)
 {
   return rt-code == INSN_LIST;
@@ -474,7 +475,7 @@ public:
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_sequence *::test (rtx rt)
 {
   return rt-code == SEQUENCE;
@@ -482,7 +483,7 @@ is_a_helper rtx_sequence *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper const rtx_sequence *::test (const_rtx rt)
 {
   return rt-code == SEQUENCE;
@@ -778,7 +779,7 @@ struct GTY(()) rtvec_def {
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_insn *::test (rtx rt)
 {
   return (INSN_P (rt)
@@ -790,7 +791,7 @@ is_a_helper rtx_insn *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper const rtx_insn *::test (const_rtx rt)
 {
   return (INSN_P (rt)
@@ -802,7 +803,7 @@ is_a_helper const rtx_insn *::test (const_rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_debug_insn *::test (rtx rt)
 {
   return DEBUG_INSN_P (rt);
@@ -810,7 +811,7 @@ is_a_helper rtx_debug_insn *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_nonjump_insn *::test (rtx rt)
 {
   return NONJUMP_INSN_P (rt);
@@ -818,7 +819,7 @@ is_a_helper rtx_nonjump_insn *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_jump_insn *::test (rtx rt)
 {
   return JUMP_P (rt);
@@ -826,7 +827,7 @@ is_a_helper rtx_jump_insn *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_call_insn *::test (rtx rt)
 {
   return CALL_P (rt);
@@ -834,7 +835,7 @@ is_a_helper rtx_call_insn *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_call_insn *::test (rtx_insn *insn)
 {
   return CALL_P (insn);
@@ -842,7 +843,7 @@ is_a_helper rtx_call_insn *::test (rtx_insn *insn)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_jump_table_data *::test (rtx rt)
 {
   return JUMP_TABLE_DATA_P (rt);
@@ -850,7 +851,7 @@ is_a_helper rtx_jump_table_data *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_jump_table_data *::test (rtx_insn *insn)
 {
   return JUMP_TABLE_DATA_P (insn);
@@ -858,7 +859,7 @@ is_a_helper rtx_jump_table_data *::test (rtx_insn *insn)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_barrier *::test (rtx rt)
 {
   return BARRIER_P (rt);
@@ -866,7 +867,7 @@ is_a_helper rtx_barrier *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_code_label *::test (rtx rt)
 {
   return LABEL_P (rt);
@@ -874,7 +875,7 @@ is_a_helper rtx_code_label *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_code_label *::test (rtx_insn *insn)
 {
   return LABEL_P (insn);
@@ -882,7 +883,7 @@ is_a_helper rtx_code_label *::test (rtx_insn *insn)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 is_a_helper rtx_note *::test (rtx rt)
 {
   return NOTE_P (rt);
@@ -890,7 +891,7 @@ is_a_helper rtx_note *::test (rtx rt)
 
 template 
 template 
-inline bool
+ALWAYS_INLINE bool
 

Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Andrew Pinski
On Tue, Sep 2, 2014 at 12:03 AM, Andi Kleen a...@firstfloor.org wrote:
 From: Andi Kleen a...@linux.intel.com

 I noticed that with the trunk compiler a range of the new rtl
 inlines show up as hot in a profiler during stage1. I think
 that happens because stage1 is not using optimization
 and does not inline plain inline.  And these rtl inlines
 are very frequently called.

 Mark them all with __attribute__((always_inline)) which forces
 inlining even with -O0.


I think this is wrong and should not be committed.  stage1 is designed
to be without optimization and there have been bugs in the past in the
area of always_inline too.

Thanks,
Andrew Pinski


 Passes bootstrap and testing on x86_64-linux.

 Cc: dmalc...@redhat.com

 include/:

 2014-09-01  Andi Kleen  a...@linux.intel.com

 * ansidecl.h (ALWAYS_INLINE): Add.

 gcc/:

 2014-09-01  Andi Kleen  a...@linux.intel.com

 * rtl.h (is_a_helper): Change inline to ALWAYS_INLINE.
 (rhs_regno): Dito.
 (init_costs_to_max): Dito.
 (init_costs_to_zero): Dito.
 (costs_lt_p): Dito.
 (costs_add_n_insns): Dito.
 (wi::int_traits ::get_precision): Dito.
 (wi::shwi): Dito.
 (wi::min_value): Dito.
 (wi::max_value): Dito.
 (set_rtx_cost): Dito.
 (get_full_set_rtx_cost): Dito.
 (set_src_cost): Dito.
 (get_full_set_src_cost): Dito.
 (get_mem_attrs): Dito.
 ---
  gcc/rtl.h  | 111 
 +++--
  include/ansidecl.h |   6 +++
  2 files changed, 62 insertions(+), 55 deletions(-)

 diff --git a/gcc/rtl.h b/gcc/rtl.h
 index beeed2f..d711e43 100644
 --- a/gcc/rtl.h
 +++ b/gcc/rtl.h
 @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
  #define GCC_RTL_H

  #include utility
 +#include ansidecl.h
  #include statistics.h
  #include machmode.h
  #include input.h
 @@ -418,7 +419,7 @@ public:

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_expr_list *::test (rtx rt)
  {
return rt-code == EXPR_LIST;
 @@ -447,7 +448,7 @@ public:

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_insn_list *::test (rtx rt)
  {
return rt-code == INSN_LIST;
 @@ -474,7 +475,7 @@ public:

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_sequence *::test (rtx rt)
  {
return rt-code == SEQUENCE;
 @@ -482,7 +483,7 @@ is_a_helper rtx_sequence *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper const rtx_sequence *::test (const_rtx rt)
  {
return rt-code == SEQUENCE;
 @@ -778,7 +779,7 @@ struct GTY(()) rtvec_def {

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_insn *::test (rtx rt)
  {
return (INSN_P (rt)
 @@ -790,7 +791,7 @@ is_a_helper rtx_insn *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper const rtx_insn *::test (const_rtx rt)
  {
return (INSN_P (rt)
 @@ -802,7 +803,7 @@ is_a_helper const rtx_insn *::test (const_rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_debug_insn *::test (rtx rt)
  {
return DEBUG_INSN_P (rt);
 @@ -810,7 +811,7 @@ is_a_helper rtx_debug_insn *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_nonjump_insn *::test (rtx rt)
  {
return NONJUMP_INSN_P (rt);
 @@ -818,7 +819,7 @@ is_a_helper rtx_nonjump_insn *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_jump_insn *::test (rtx rt)
  {
return JUMP_P (rt);
 @@ -826,7 +827,7 @@ is_a_helper rtx_jump_insn *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_call_insn *::test (rtx rt)
  {
return CALL_P (rt);
 @@ -834,7 +835,7 @@ is_a_helper rtx_call_insn *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_call_insn *::test (rtx_insn *insn)
  {
return CALL_P (insn);
 @@ -842,7 +843,7 @@ is_a_helper rtx_call_insn *::test (rtx_insn *insn)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_jump_table_data *::test (rtx rt)
  {
return JUMP_TABLE_DATA_P (rt);
 @@ -850,7 +851,7 @@ is_a_helper rtx_jump_table_data *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_jump_table_data *::test (rtx_insn *insn)
  {
return JUMP_TABLE_DATA_P (insn);
 @@ -858,7 +859,7 @@ is_a_helper rtx_jump_table_data *::test (rtx_insn *insn)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_barrier *::test (rtx rt)
  {
return BARRIER_P (rt);
 @@ -866,7 +867,7 @@ is_a_helper rtx_barrier *::test (rtx rt)

  template 
  template 
 -inline bool
 +ALWAYS_INLINE bool
  is_a_helper rtx_code_label *::test (rtx rt)
  {
return LABEL_P (rt);
 @@ -874,7 +875,7 @@ is_a_helper rtx_code_label *::test (rtx rt)

  template 
  template 
 -inline 

Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Andi Kleen

 there have been bugs in the past in the area of always_inline too.

You're arguing for my patch. It would find those bugs.

-Andi


Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Andrew Pinski
On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen a...@firstfloor.org wrote:

 there have been bugs in the past in the area of always_inline too.

 You're arguing for my patch. It would find those bugs.


No I am arguing against it since the older versions of GCC we cannot change.

Thanks,
Andrew


 -Andi


Re: [PATCH] gcc-ar: Turn plugin not found case into a warning

2014-09-02 Thread Richard Biener
On Mon, Sep 1, 2014 at 6:33 PM, Andi Kleen a...@firstfloor.org wrote:
 From: Andi Kleen a...@linux.intel.com

 Only give a warning when gcc-ar/nm/ranlib cannot find the plugin.
 In this case do not pass a plugin argument to the wrapped program.

 This should make it work on non linker plugin systems, so
 that the build system can use it unconditionally.

Hmm, maybe conditionalize the error on HAVE_LTO_PLUGIN == 2
instead?  And not warn for HAVE_LTO_PLUGIN == 0 at all?

Richard.

 gcc/:

 2014-09-01  Andi Kleen  a...@linux.intel.com

 * gcc-ar (main): Only warn when plugin not found.
 ---
  gcc/gcc-ar.c | 27 ---
  1 file changed, 16 insertions(+), 11 deletions(-)

 diff --git a/gcc/gcc-ar.c b/gcc/gcc-ar.c
 index fdff89c..e27ea3b 100644
 --- a/gcc/gcc-ar.c
 +++ b/gcc/gcc-ar.c
 @@ -182,8 +182,8 @@ main (int ac, char **av)
plugin = find_a_file (target_path, LTOPLUGINSONAME, R_OK);
if (!plugin)
  {
 -  fprintf (stderr, %s: Cannot find plugin '%s'\n, av[0], 
 LTOPLUGINSONAME);
 -  exit (1);
 +  fprintf (stderr, %s: Warning: Cannot find plugin '%s'\n, av[0], 
 LTOPLUGINSONAME);
 +  /* Fall back to not using a plugin.  */
  }

/* Find the wrapped binutils program.  */
 @@ -204,15 +204,20 @@ main (int ac, char **av)
  }

/* Create new command line with plugin */
 -  nargv = XCNEWVEC (const char *, ac + 4);
 -  nargv[0] = exe_name;
 -  nargv[1] = --plugin;
 -  nargv[2] = plugin;
 -  if (is_ar  av[1]  av[1][0] != '-')
 -av[1] = concat (-, av[1], NULL);
 -  for (k = 1; k  ac; k++)
 -nargv[2 + k] = av[k];
 -  nargv[2 + k] = NULL;
 +  if (plugin != NULL)
 +{
 +  nargv = XCNEWVEC (const char *, ac + 4);
 +  nargv[0] = exe_name;
 +  nargv[1] = --plugin;
 +  nargv[2] = plugin;
 +  if (is_ar  av[1]  av[1][0] != '-')
 +av[1] = concat (-, av[1], NULL);
 +  for (k = 1; k  ac; k++)
 +nargv[2 + k] = av[k];
 +  nargv[2 + k] = NULL;
 +}
 +  else
 +nargv = CONST_CAST2 (const char **, char **, av);

/* Run utility */
/* ??? the const is misplaced in pex_one's argv? */
 --
 2.1.0



[PATCH] Fix PR62695

2014-09-02 Thread Richard Biener

The auto_vec replacement missed one truncation.

Committed as obvious.

Richard.

2014-09-02  Richard Biener  rguent...@suse.de

PR tree-optimization/62695
* tree-ssa-structalias.c (find_func_clobbers): Add missing
vector truncate.

* gfortran.dg/pr62695.f90: New testcase.

Index: gcc/tree-ssa-structalias.c
===
--- gcc/tree-ssa-structalias.c  (revision 214795)
+++ gcc/tree-ssa-structalias.c  (working copy)
@@ -5042,6 +5042,7 @@ find_func_clobbers (struct function *fn,
  get_constraint_for_address_of (arg, rhsc);
  FOR_EACH_VEC_ELT (rhsc, j, rhsp)
process_constraint (new_constraint (lhs, *rhsp));
+ rhsc.truncate (0);
}
 
   /* Build constraints for propagating clobbers/uses along the
Index: gcc/testsuite/gfortran.dg/pr62695.f90
===
--- gcc/testsuite/gfortran.dg/pr62695.f90   (revision 0)
+++ gcc/testsuite/gfortran.dg/pr62695.f90   (working copy)
@@ -0,0 +1,27 @@
+! { dg-do compile }
+! { dg-options -O -fipa-pta }
+
+MODULE dbcsr_dist_operations
+  TYPE dbcsr_mp_obj
+  END TYPE dbcsr_mp_obj
+  INTERFACE
+SUBROUTINE dbcsr_mp_new(mp_env, pgrid, mp_group, mynode, numnodes, myprow,
+ mypcol)
+  IMPORT
+  TYPE(dbcsr_mp_obj), INTENT(OUT)  :: mp_env
+  INTEGER, DIMENSION(0:, 0:), INTENT(IN)   :: pgrid
+END SUBROUTINE dbcsr_mp_new
+  END INTERFACE
+CONTAINS
+  SUBROUTINE dbcsr_mp_make_env (mp_env, mp_group, 
+   nprocs, pgrid_dims, error)
+TYPE(dbcsr_mp_obj), INTENT(OUT)  :: mp_env
+  OPTIONAL   :: pgrid_dims
+INTEGER  :: error_handle, group, mynode, 
+numnodes, pcol, prow
+INTEGER, ALLOCATABLE, DIMENSION(:, :):: pgrid
+INTEGER, DIMENSION(2):: coord, myploc, npdims
+CALL dbcsr_mp_new (mp_env, pgrid, group, mynode, numnodes,
+ myprow=myploc(1), mypcol=myploc(2))
+  END SUBROUTINE dbcsr_mp_make_env
+END MODULE dbcsr_dist_operations


Re: [PATCH] support ggc hash_map and hash_set

2014-09-02 Thread Richard Biener
On Tue, Sep 2, 2014 at 3:56 AM,  tsaund...@mozilla.com wrote:
 From: Trevor Saunders tsaund...@mozilla.com

 Hi,

 There are still some issues to make this work really nicely, but this part is
 probably good enough its worth reviewing.

 For one thing you can't use ggc hash_map or set in front ends with some types
 or gengtype will decide to put the overloads of the marking routines it
 provides in a front end file instead of the one it choose before breaking 
 other
 front ends.  However that seems to be an unrelated issue you can trigger it
 without using hash_map/set, so we might as well solve it separetly.

 I had to have the entry marking functions for set deligate to the traits class
 because gcc  4.9.1 issues clearly bogus errors if you inline the code from 
 the
 traits implementation.  We may well want to make map work the same way at some
 point to enable some of the special GTY attributes like if_marked, but it
 doesn't seem to be necessary right now.

 bootstrapped + regtested without regressions on x86_64-unknown-linux-gnu, ok?

Ok if you make the gcc_assert()s in the marking routines gcc_checking_assert()s.

Btw - do manual markers need any special support for finalizers?
Does the hash table need any special support to make finalizers efficient
(avoid recording for each entry if stored in-place?)

Thanks,
Richard.

 Trev

 gcc/ChangeLog:

 2014-09-01  Trevor Saunders  tsaund...@mozilla.com

 * alloc-pool.c: Include coretypes.h.
 * cgraph.h, dbxout.c, dwarf2out.c, except.c, except.h, function.c,
 function.h, symtab.c, tree-cfg.c, tree-eh.c: Use hash_map and
 hash_set instead of htab.
 * ggc-page.c (in_gc): New variable.
 (ggc_free): Do nothing if a collection is taking place.
 (ggc_collect): Set in_gc appropriately.
 * ggc.h (gt_ggc_mx(const char *)): New function.
 (gt_pch_nx(const char *)): Likewise.
 (gt_ggc_mx(int)): Likewise.
 (gt_pch_nx(int)): Likewise.
 * hash-map.h (hash_map::hash_entry::ggc_mx): Likewise.
 (hash_map::hash_entry::pch_nx): Likewise.
 (hash_map::hash_entry::pch_nx_helper): Likewise.
 (hash_map::hash_map): Adjust.
 (hash_map::create_ggc): New function.
 (gt_ggc_mx): Likewise.
 (gt_pch_nx): Likewise.
 * hash-set.h (default_hashset_traits::ggc_mx): Likewise.
 (default_hashset_traits::pch_nx): Likewise.
 (hash_set::hash_entry::ggc_mx): Likewise.
 (hash_set::hash_entry::pch_nx): Likewise.
 (hash_set::hash_entry::pch_nx_helper): Likewise.
 (hash_set::hash_set): Adjust.
 (hash_set::create_ggc): New function.
 (hash_set::elements): Likewise.
 (gt_ggc_mx): Likewise.
 (gt_pch_nx): Likewise.
 * hash-table.h (hash_table::hash_table): Adjust.
 (hash_table::m_ggc): New member.
 (hash_table::~hash_table): Adjust.
 (hash_table::expand): Likewise.
 (hash_table::empty): Likewise.
 (gt_ggc_mx): New function.
 (hashtab_entry_note_pointers): Likewise.
 (gt_pch_nx): Likewise.


 diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c
 index 0d31835..bfaa0e4 100644
 --- a/gcc/alloc-pool.c
 +++ b/gcc/alloc-pool.c
 @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see

  #include config.h
  #include system.h
 +#include coretypes.h
  #include alloc-pool.h
  #include hash-table.h
  #include hash-map.h
 diff --git a/gcc/cgraph.h b/gcc/cgraph.h
 index 879899c..030a1c7 100644
 --- a/gcc/cgraph.h
 +++ b/gcc/cgraph.h
 @@ -1604,7 +1604,6 @@ struct cgraph_2node_hook_list;

  /* Map from a symbol to initialization/finalization priorities.  */
  struct GTY(()) symbol_priority_map {
 -  symtab_node *symbol;
priority_type init;
priority_type fini;
  };
 @@ -1872,7 +1871,7 @@ public:
htab_t GTY((param_is (symtab_node))) assembler_name_hash;

/* Hash table used to hold init priorities.  */
 -  htab_t GTY ((param_is (symbol_priority_map))) init_priority_hash;
 +  hash_mapsymtab_node *, symbol_priority_map *init_priority_hash;

FILE* GTY ((skip)) dump_file;

 diff --git a/gcc/dbxout.c b/gcc/dbxout.c
 index 946f1d1..d856bdd 100644
 --- a/gcc/dbxout.c
 +++ b/gcc/dbxout.c
 @@ -2484,12 +2484,9 @@ dbxout_expand_expr (tree expr)
  /* Helper function for output_used_types.  Queue one entry from the
 used types hash to be output.  */

 -static int
 -output_used_types_helper (void **slot, void *data)
 +bool
 +output_used_types_helper (tree const type, vectree *types_p)
  {
 -  tree type = (tree) *slot;
 -  vectree *types_p = (vectree *) data;
 -
if ((TREE_CODE (type) == RECORD_TYPE
 || TREE_CODE (type) == UNION_TYPE
 || TREE_CODE (type) == QUAL_UNION_TYPE
 @@ -2502,7 +2499,7 @@ output_used_types_helper (void **slot, void *data)
 TREE_CODE (TYPE_NAME (type)) == TYPE_DECL)
  types_p-quick_push (TYPE_NAME (type));

 -  return 1;
 +  return true;
  }

  /* This is a qsort callback which sorts types and declarations into a
 @@ -2544,8 +2541,9 @@ output_used_types (void)
int i;
tree 

Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Steven Bosscher
On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
 On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:

 there have been bugs in the past in the area of always_inline too.

 You're arguing for my patch. It would find those bugs.


 No I am arguing against it since the older versions of GCC we cannot change.

Should such bugs turn up, we can account for them in ansidecl.h.

I think Andi's patch should go in.

Ciao!
Steven


Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread pinskia


 On Sep 2, 2014, at 1:36 AM, Steven Bosscher stevenb@gmail.com wrote:
 
 On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
 On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:
 
 there have been bugs in the past in the area of always_inline too.
 
 You're arguing for my patch. It would find those bugs.
 
 
 No I am arguing against it since the older versions of GCC we cannot change.
 
 Should such bugs turn up, we can account for them in ansidecl.h.
 
 I think Andi's patch should go in.

I does hurt debug ability with older compilers too. So if we need to figure out 
why stage is being miscompiled it is harder to figure how to work around it.  

I think stage should really be -O0 even with respect of inline.  I think we 
should never force inline inside gcc even at -O0 as it is just a hack (we know 
it as we added the attribute in the first place). 

Thanks,
Andrew

 
 Ciao!
 Steven


Re: [PINGv2][PATCH] Fix for PR 61875

2014-09-02 Thread Yury Gribov

On 09/01/2014 08:28 PM, Jakub Jelinek wrote:

This situation occurs when somebody decides to build GCC with
-fexeptions and -frtti which are forbidden for libsanitizer.

I don't see a reason for this, simply don't do that, libsanitizer AFAIK
isn't the only library where it is highly undesirable to have these flags in
CXXFLAGS.  libatomic and libgtm are another examples of libraries that
shouldn't be compiled with those flags.


Thanks, Jakub. Could someone close 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61875 as Invalid?


-Y


Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Richard Biener
On Tue, Sep 2, 2014 at 10:36 AM, Steven Bosscher stevenb@gmail.com wrote:
 On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote:
 On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote:

 there have been bugs in the past in the area of always_inline too.

 You're arguing for my patch. It would find those bugs.


 No I am arguing against it since the older versions of GCC we cannot change.

 Should such bugs turn up, we can account for them in ansidecl.h.

 I think Andi's patch should go in.

I disagree.  always-inline isn't an optimization attribute but a correctness
one.

Instead we should not build stage1 with -O0 if we detect a reasonably
recent GCC host compiler (say one that is still maintained).  Or
we simply should make -finline work at -O0 (I suppose it might already
work?) and use it.

Richard.

 Ciao!
 Steven


Ping^2 - RE: [PATCH] Add target hook to override DWARF2 frame register size

2014-09-02 Thread Matthew Fortune
Ping^2

Added Jason as maintainer for dwarf related things.

This hook will be used in the following patch:
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02172.html

Thanks,
Matthew

 Ping.
 
 Thanks,
 Matthew
 
  Sent: 07 August 2014 07:21
   Please don't add target macros. Add a hook if you must, but we're
   supposed to remove target macros, not add new ones :-)
 
  Thanks Steven, I wasn't sure if there were still things that were
  acceptable as macros. There's a lot to get rid of still.
 
  Updated patch using a target hook. I've opted to move the logic
  which handles part clobbered registers into the default implementation
  as that seemed natural. I have no real preference if others feel that
  is the wrong thing to do. This will be used by an up-coming patch for
  MIPS O32 ABI extensions.
 
  Bootstrapped and regtested on x86_64-linux-gnu.
 
  Thanks,
  Matthew
 
  gcc/
  * target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook.
  * targhooks.c (default_dwarf_frame_reg_mode): New function.
  * targhooks.h (default_dwarf_frame_reg_mode): New prototype.
  * doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document.
  * doc/tm.texi: Regenerate.
  * dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode
  selection logic to default_dwarf_frame_reg_mode.
  ---
   gcc/doc/tm.texi|  7 +++
   gcc/doc/tm.texi.in |  2 ++
   gcc/dwarf2cfi.c|  4 +---
   gcc/target.def | 11 +++
   gcc/targhooks.c| 13 +
   gcc/targhooks.h|  1 +
   6 files changed, 35 insertions(+), 3 deletions(-)
 
  diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
  index dd72b98..aa92ce4 100644
  --- a/gcc/doc/tm.texi.in
  +++ b/gcc/doc/tm.texi.in
  @@ -6604,6 +6604,8 @@ the target supports DWARF 2 frame unwind
  information.
 
   @hook TARGET_DWARF_REGISTER_SPAN
 
  +@hook TARGET_DWARF_FRAME_REG_MODE
  +
   @hook TARGET_INIT_DWARF_REG_SIZES_EXTRA
 
   @hook TARGET_ASM_TTYPE
  diff --git a/gcc/dwarf2cfi.c b/gcc/dwarf2cfi.c
  index 85cfb60..a673106 100644
  --- a/gcc/dwarf2cfi.c
  +++ b/gcc/dwarf2cfi.c
  @@ -271,11 +271,9 @@ expand_builtin_init_dwarf_reg_sizes (tree
 address)
 if (rnum  DWARF_FRAME_REGISTERS)
  {
HOST_WIDE_INT offset = rnum * GET_MODE_SIZE (mode);
  - enum machine_mode save_mode = reg_raw_mode[i];
HOST_WIDE_INT size;
  + enum machine_mode save_mode = targetm.dwarf_frame_reg_mode (i);
 
  - if (HARD_REGNO_CALL_PART_CLOBBERED (i, save_mode))
  -   save_mode = choose_hard_reg_mode (i, 1, true);
if (dnum == DWARF_FRAME_RETURN_COLUMN)
  {
if (save_mode == VOIDmode)
  diff --git a/gcc/target.def b/gcc/target.def
  index 3a41db1..d5aba51 100644
  --- a/gcc/target.def
  +++ b/gcc/target.def
  @@ -3216,6 +3216,17 @@ If not defined, the default is to return
  @code{NULL_RTX}.,
rtx, (rtx reg),
hook_rtx_rtx_null)
 
  +/* Given a register return the mode of the corresponding DWARF frame
  +   register.  */
  +DEFHOOK
  +(dwarf_frame_reg_mode,
  + Given a register, this hook should return the mode which the\n\
  +corresponding Dwarf frame register should have.  This is normally\n\
  +used to return a smaller mode than the raw mode to prevent call\n\
  +clobbered parts of a register altering the frame register size.,
  + enum machine_mode, (int regno),
  + default_dwarf_frame_reg_mode)
  +
   /* If expand_builtin_init_dwarf_reg_sizes needs to fill in table
  entries not corresponding directly to registers below
  FIRST_PSEUDO_REGISTER, this hook should generate the necessary
  diff --git a/gcc/targhooks.c b/gcc/targhooks.c
  index 0f27a5a..765bf3b 100644
  --- a/gcc/targhooks.c
  +++ b/gcc/targhooks.c
  @@ -1456,6 +1456,19 @@ default_debug_unwind_info (void)
 return UI_NONE;
   }
 
  +/* Determine the correct mode for a Dwarf frame register that
  represents
  +   register REGNO.  */
  +
  +enum machine_mode
  +default_dwarf_frame_reg_mode (int regno)
  +{
  +  enum machine_mode save_mode = reg_raw_mode[regno];
  +
  +  if (HARD_REGNO_CALL_PART_CLOBBERED (regno, save_mode))
  +save_mode = choose_hard_reg_mode (regno, 1, true);
  +  return save_mode;
  +}
  +
   /* To be used by targets where reg_raw_mode doesn't return the right
  mode for registers used in apply_builtin_return and
  apply_builtin_arg.  */
 
  diff --git a/gcc/targhooks.h b/gcc/targhooks.h
  index 4be33f8..fa88679 100644
  --- a/gcc/targhooks.h
  +++ b/gcc/targhooks.h
  @@ -194,6 +194,7 @@ extern int default_label_align_max_skip (rtx);
   extern int default_jump_align_max_skip (rtx);
   extern section * default_function_section(tree decl, enum
  node_frequency freq,
bool startup, bool exit);
  +extern enum machine_mode default_dwarf_frame_reg_mode (int);
   extern enum machine_mode default_get_reg_raw_mode (int);
   extern bool default_keep_leaf_when_profiled ();
 
  --
  1.9.4


[PATCH][match-and-simplify] Complete conversion patterns

2014-09-02 Thread Richard Biener

This completes conversion patterns (apart from commented case
which needs a new IL feature).

Bootstrapped on x86_64-unknown-linux-gnu, applied.

Richard.

2014-09-02  Richard Biener  rguent...@suse.de

* match-conversions.pd: Add more patterns.

Index: gcc/match-conversions.pd
===
--- gcc/match-conversions.pd(revision 214795)
+++ gcc/match-conversions.pd(working copy)
@@ -1,21 +1,42 @@
-#if GIMPLE
-/* Basic strip-useless-type-conversions.  */
-(simplify
-  (convert @0)
-  (if (useless_type_conversion_p (type, TREE_TYPE (@0)))
-   @0))
-#endif
-
-
 /* From fold_unary in order of appearance.  */
 
-#if GENERIC
-/* For GIMPLE this is convered by the useless_type_conversion stripping.  */
+/* Re-association barriers around constants and other re-association
+   barriers can be removed.  */
 (simplify
-  (convert @0)
-  (if (type == TREE_TYPE (@0))
-   @0))
-#endif
+ (paren CONSTANT_CLASS_P@0)
+ @0)
+(simplify
+ (paren (paren @0))
+ (paren @0))
+
+/* Basic strip-useless-type-conversions / strip_nops.  */
+(for cvt in convert view_convert
+ (simplify
+  (cvt @0)
+  (if ((GIMPLE  useless_type_conversion_p (type, TREE_TYPE (@0)))
+   || (GENERIC  type == TREE_TYPE (@0)))
+   @0)))
+
+/* If we have (type) (a CMP b) and type is an integral type, return
+   new expression involving the new type.  Canonicalize
+   (type) (a CMP b) to (a CMP b) ? (type) true : (type) false for
+   non-integral type.
+   Do not fold the result as that would not simplify further, also
+   folding again results in recursions.  */
+/* ???  Eh, do we want sth like (define-ops cmp lt le eq ...) to not
+   repeat this too many times?  */
+(for cmp in lt le eq ne ge gt unordered ordered unlt unle ungt unge uneq ltgt
+ (simplify
+   (convert (cmp@2 @0 @1))
+   (if (TREE_CODE (type) == BOOLEAN_TYPE)
+(cmp @0 @1))
+   /* Not sure if the following makes sense for GIMPLE.  */
+   (if (!INTEGRAL_TYPE_P (type)  !VOID_TYPE_P (type)
+TREE_CODE (type) != VECTOR_TYPE)
+(cond @2
+  { constant_boolean_node (true, type); }
+  { constant_boolean_node (false, type); }
+
 
 /* Convert (T1)(~(T2)X) into ~(T1)X if T1 and T2 are integral types
of the same precision, and X is an integer type not narrower than
@@ -29,6 +50,41 @@
TYPE_PRECISION (type) = TYPE_PRECISION (TREE_TYPE (@1)))
   (bit_not (convert @1
 
+/* Convert (T1)(X * Y) into (T1)X * (T1)Y if T1 is narrower than the
+   type of X and Y (integer types only).  */
+(simplify
+ (convert (mult @0 @1))
+ (if (INTEGRAL_TYPE_P (type)
+   INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   TYPE_PRECISION (type)  TYPE_PRECISION (TREE_TYPE (@0)))
+  (if (TYPE_OVERFLOW_WRAPS (type))
+   (mult (convert @0) (convert @1)
+#if 0
+  /* 1) We can't handle the two-conversions-in-a-row below.
+ 2) We can't properly specify the type for the inner conversion
+(unsigned_type_for).  Suggested syntax below.  */
+  (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
+   (convert (mult (convert:utype @0) (convert:utype @1
+#endif
+
+
+/* For integral conversions with the same precision or pointer
+   conversions use a NOP_EXPR instead.  */
+(simplify
+  (view_convert @0)
+  (if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
+(INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
+TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0)))
+   (convert @0)))
+
+/* Strip inner integral conversions that do not change the precision.  */
+(simplify
+  (view_convert (convert@0 @1))
+  (if ((INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
+(INTEGRAL_TYPE_P (TREE_TYPE (@1)) || POINTER_TYPE_P (TREE_TYPE (@1)))
+(TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1
+   (view_convert @1)))
+
 
 /* From tree-ssa-forwprop.c:combine_conversions.  */
 
@@ -64,12 +120,9 @@
   handled below, if we are converting something to its own
   type via an object of identical or wider precision, neither
   conversion is needed.  */
-   (if (
-#if GIMPLE
-   useless_type_conversion_p (type, inside_type)
-#else
-   TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (inside_type)
-#endif
+   (if (((GIMPLE  useless_type_conversion_p (type, inside_type))
+|| (GENERIC
+ TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (inside_type)))
 (((inter_int || inter_ptr)  final_int)
|| (inter_float  final_float))
 inter_prec = final_prec)


Re: [PATCH C++] - SD-6 Implementation Part 3 - .

2014-09-02 Thread Jonathan Wakely

On 01/09/14 21:46 -0400, Ed Smith-Rowland wrote:

Index: include/bits/stl_function.h
===
--- include/bits/stl_function.h (revision 214680)
+++ include/bits/stl_function.h (working copy)
@@ -217,6 +217,10 @@
};

#if __cplusplus  201103L
+
+#define __cpp_lib_transparent_operators 201210
+#define __cpp_lib_generic_associative_lookup 201304


The generic associative lookup feature is not supported.


Index: testsuite/experimental/feat-lib-fund.cc
===
--- testsuite/experimental/feat-lib-fund.cc (revision 0)
+++ testsuite/experimental/feat-lib-fund.cc (working copy)
@@ -0,0 +1,25 @@
+// { dg-options -std=gnu++14 }
+// { dg-do compile }
+
+#include experimental/optional
+#include experimental/string_view
+
+#if !__has_include(experimental/optional)
+#  error experimental/optional
+#endif
+
+//#if !__has_include(experimental/net)
+//#  error experimental/net
+//#endif
+
+//#if !__has_include(experimental/any)
+//#  error experimental/any
+//#endif


This can be uncommented, experimental/any is available.

OK with those changes.

Once committed, can you update https://gcc.gnu.org/gcc-5/changes.html
to say GCC follows the SD-6 recommendations?

Thanks.


Re: [PATCH] Add -fno-instrument-function

2014-09-02 Thread Richard Biener
On Mon, Sep 1, 2014 at 10:25 PM, Andi Kleen a...@firstfloor.org wrote:
 From: Andi Kleen a...@linux.intel.com

 [This was an old patch of mine that has been posted before,
 but never made it in]

 This adds a new C/C++ option to force
 __attribute__((no_instrument_function)) on every function compiled.

 This is useful together with LTO. You may want to have the whole
 program compiled with -pg and have to specify that in the LTO
 link, but want to disable it for some specific files. As the
 option works on the frontend level it is already passed through
 properly by LTO.

 Without LTO it is equivalent to not specifing -pg or -mfentry.

 This fixes some missing functionality in the Linux kernel LTO port,
 in particular it allows using the function tracer with LTO kernels.

 Longer term it would be nicer if all suitable options were handled
 like this for LTO by turning them into attributes, but that would
 be a much larger project.

 Passed bootstrap and test suite on x86_64-linux. Ok?

Hmm, why not make -no-pg (does that exist?) and/or -mno-fentry
do this?  That is, I don't see the need for a new option.

Or do it the other way around - change the default to
DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT and make
-pg/-mfentry unset that (or have DECL_INSTRUMENT_FUNCTION_ENTRY_EXIT).

Richard.

 gcc/:

 2014-09-01  Andi Kleen a...@linux.intel.com

 * c.opt (fno-instrument-function): Document.

 gcc/c:

 2014-09-01  Andi Kleen a...@linux.intel.com

 * c-decl.c (start_function): Handle force_no_instrument_function

 gcc/cp:

 2014-09-01  Andi Kleen a...@linux.intel.com

 * decl.c (start_preparsed_function): Handle
 force_no_instrument_function

 gcc/testsuite:

 2014-09-01  Andi Kleen a...@linux.intel.com

 * g++.dg/fno-instrument-function.C: Add.
 * gcc.dg/fno-instrument-function.c: Add.
 ---
  gcc/c-family/c.opt |  4 
  gcc/c/c-decl.c |  3 +++
  gcc/cp/decl.c  |  3 +++
  gcc/doc/invoke.texi|  8 +++-
  gcc/testsuite/g++.dg/fno-instrument-function.C | 18 ++
  gcc/testsuite/gcc.dg/fno-instrument-function.c | 24 
  6 files changed, 59 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/fno-instrument-function.C
  create mode 100644 gcc/testsuite/gcc.dg/fno-instrument-function.c

 diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
 index 210a099..2aabd23 100644
 --- a/gcc/c-family/c.opt
 +++ b/gcc/c-family/c.opt
 @@ -1118,6 +1118,10 @@ Enum(ivar_visibility) String(public) 
 Value(IVAR_VISIBILITY_PUBLIC)
  EnumValue
  Enum(ivar_visibility) String(package) Value(IVAR_VISIBILITY_PACKAGE)

 +fno-instrument-function
 +C C++ ObjC ObjC++ RejectNegative Report Var(force_no_instrument_function)
 +Force __attribute__((no_instrument_function)) for all functions in 
 translation unit.
 +
  fnonansi-builtins
  C++ ObjC++ Var(flag_no_nonansi_builtin, 0)

 diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
 index b4995a6..493240f 100644
 --- a/gcc/c/c-decl.c
 +++ b/gcc/c/c-decl.c
 @@ -8044,6 +8044,9 @@ start_function (struct c_declspecs *declspecs, struct 
 c_declarator *declarator,
if (current_scope == file_scope)
  maybe_apply_pragma_weak (decl1);

 +  if (force_no_instrument_function)
 +DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (decl1) = 1;
 +
/* Warn for unlikely, improbable, or stupid declarations of `main'.  */
if (warn_main  MAIN_NAME_P (DECL_NAME (decl1)))
  {
 diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
 index d03f8a4..505ad50 100644
 --- a/gcc/cp/decl.c
 +++ b/gcc/cp/decl.c
 @@ -13251,6 +13251,9 @@ start_preparsed_function (tree decl1, tree attrs, int 
 flags)
 lookup_attribute (noinline, attrs))
  warning (0, inline function %q+D given attribute noinline, decl1);

 +  if (force_no_instrument_function)
 +DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (decl1) = 1;
 +
/* Handle gnu_inline attribute.  */
if (GNU_INLINE_P (decl1))
  {
 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
 index d15d4a9..51b8d20 100644
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
 @@ -169,7 +169,7 @@ in the following sections.
  -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol
  -fno-asm  -fno-builtin  -fno-builtin-@var{function} @gol
  -fhosted  -ffreestanding -fopenmp -fopenmp-simd -fms-extensions @gol
 --fplan9-extensions -trigraphs  -traditional  -traditional-cpp @gol
 +-fplan9-extensions -trigraphs  -traditional  -traditional-cpp 
 -fno-instrument-function @gol
  -fallow-single-precision  -fcond-mismatch -flax-vector-conversions @gol
  -fsigned-bitfields  -fsigned-char @gol
  -funsigned-bitfields  -funsigned-char}
 @@ -1971,6 +1971,12 @@ Allow implicit conversions between vectors with 
 differing numbers of
  elements and/or incompatible element types.  This option should not be
  used for new code.

 +@item -fno-instrument-function
 

Re: [PATCH] Avoid inserting dead code in PRE, do less work

2014-09-02 Thread Richard Biener
On Mon, 1 Sep 2014, Richard Biener wrote:

 
 The following patch tries to work towards fixing PR62291 by moving
 NEW_SETS/AVAIL_OUT adding strictly to insert_into_preds_of_block
 and the value / expression we wanted to insert.  If doing that for
 other unrelated expressions this may cause fake partial
 redundancies to be detected and dead code will be inserted such
 as for gcc.dg/tree-ssa/ssa-pre-28.c which is now fixed.
 
 The idea is that we could now simulate insertion and its recursion
 without actually performing the insertions (which requires AVAIL_OUT)
 and instead postpone that to elimination time.
 
 Well.  Idea...
 
 Bootstrap and regtest running on x86_64-unknown-linux-gnu.

So this doesn't work (it wrecks gcc.c-torture/compile/pr43415.c
which endlessly inserts via find_or_generate_expression).

Which get's me back to the point that find_or_generate_expression
isn't a good implementation to fix PR37997 (gcc.dg/tree-ssa/ssa-pre-28.c).

Anyway, I'll put this patch on hold (though I certainly would like
to remove that PR37997-fixing code ...).

Richard.

 Richard.
 
 2014-09-01  Richard Biener  rguent...@suse.de
 
   * tree-ssa-pre.c (find_or_generate_expression): Expand comment.
   (create_expression_by_pieces): Do not add to NEW_SETS or
   AVAIL_OUT here.
   (insert_into_preds_of_block): Instead do it here and only
   for the partial redundant value we inserted.
 
 Index: gcc/tree-ssa-pre.c
 ===
 --- gcc/tree-ssa-pre.c(revision 214795)
 +++ gcc/tree-ssa-pre.c(working copy)
 @@ -2797,9 +2797,11 @@ find_or_generate_expression (basic_block
return NULL_TREE;
  }
  
 -  /* It must be a complex expression, so generate it recursively.  Note
 - that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c
 - where the insert algorithm fails to insert a required expression.  */
 +  /* It must be a complex expression, so generate it recursively.
 + Note that this is only necessary to handle cases like
 + gcc.dg/tree-ssa/ssa-pre-28.c where the insert algorithm fails to
 + insert a required expression because the dependent expression
 + isn't partially redundant.  */
bitmap exprset = value_expressions[lookfor];
bitmap_iterator bi;
unsigned int i;
 @@ -2846,7 +2848,6 @@ create_expression_by_pieces (basic_block
unsigned int value_id;
gimple_stmt_iterator gsi;
tree exprtype = type ? type : get_expr_type (expr);
 -  pre_expr nameexpr;
gimple newstmt;
  
switch (expr-kind)
 @@ -2941,17 +2942,12 @@ create_expression_by_pieces (basic_block
   {
 gimple stmt = gsi_stmt (gsi);
 tree forcedname = gimple_get_lhs (stmt);
 -   pre_expr nameexpr;
  
 if (TREE_CODE (forcedname) == SSA_NAME)
   {
 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname));
 VN_INFO_GET (forcedname)-valnum = forcedname;
 VN_INFO (forcedname)-value_id = get_next_value_id ();
 -   nameexpr = get_or_alloc_expr_for_name (forcedname);
 -   add_to_value (VN_INFO (forcedname)-value_id, nameexpr);
 -   bitmap_value_replace_in_set (NEW_SETS (block), nameexpr);
 -   bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr);
   }
   }
gimple_seq_add_seq (stmts, forced_stmts);
 @@ -2979,12 +2975,6 @@ create_expression_by_pieces (basic_block
VN_INFO (name)-valnum = sccvn_valnum_from_value_id (value_id);
if (VN_INFO (name)-valnum == NULL_TREE)
  VN_INFO (name)-valnum = name;
 -  gcc_assert (VN_INFO (name)-valnum != NULL_TREE);
 -  nameexpr = get_or_alloc_expr_for_name (name);
 -  add_to_value (value_id, nameexpr);
 -  if (NEW_SETS (block))
 -bitmap_value_replace_in_set (NEW_SETS (block), nameexpr);
 -  bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr);
  
pre_stats.insertions++;
if (dump_file  (dump_flags  TDF_DETAILS))
 @@ -3061,7 +3051,11 @@ insert_into_preds_of_block (basic_block
 nophi = true;
 continue;
   }
 -   avail[pred-dest_idx] = get_or_alloc_expr_for_name (builtexpr);
 +   pre_expr nameexpr = get_or_alloc_expr_for_name (builtexpr);
 +   avail[pred-dest_idx] = nameexpr;
 +   add_to_value (get_expr_value_id (eprime), nameexpr);
 +   bitmap_value_replace_in_set (NEW_SETS (bprime), nameexpr);
 +   bitmap_value_replace_in_set (AVAIL_OUT (bprime), nameexpr);
 insertions = true;
   }
else if (eprime-kind == CONSTANT)
 
 Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-28.c
 ===
 --- gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-28.c(revision 214795)
 +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-28.c(working copy)
 @@ -15,7 +15,13 @@ int foo (int i, int b, int result)
  }
  
  /* We should insert i + 1 into the if (b) path as well as the simplified
 -   i + 1  -2 expression.  And 

[C++ Patch] DR 1453

2014-09-02 Thread Paolo Carlini

Hi,

while looking into c++/58102 and DR 1405 I noticed that we don't 
implement DR 1453 either, sort of dual issue with volatile instead of 
mutable. Tested x86_64-linux.


Thanks,
Paolo.


/cp
2014-09-02  Paolo Carlini  paolo.carl...@oracle.com

DR 1453
* class.c (check_field_decls): A class of literal type cannot have
volatile non-static data members and base classes.
(explain_non_literal_class): Update.

/testsuite
2014-09-02  Paolo Carlini  paolo.carl...@oracle.com

DR 1453
* g++.dg/cpp0x/constexpr-volatile.C: New.
* g++.dg/ext/is_literal_type2.C: Likewise.
Index: cp/class.c
===
--- cp/class.c  (revision 214808)
+++ cp/class.c  (working copy)
@@ -3528,9 +3528,11 @@ check_field_decls (tree t, tree *access_decls,
CLASSTYPE_NON_AGGREGATE (t) = 1;
 
   /* If at least one non-static data member is non-literal, the whole
- class becomes non-literal.  Note: if the type is incomplete we
-will complain later on.  */
-  if (COMPLETE_TYPE_P (type)  !literal_type_p (type))
+ class becomes non-literal.  Per Core/1453, volatile non-static
+data members and base classes are also not allowed.
+Note: if the type is incomplete we will complain later on.  */
+  if (COMPLETE_TYPE_P (type)
+  (!literal_type_p (type) || CP_TYPE_VOLATILE_P (type))) 
 CLASSTYPE_LITERAL_P (t) = false;
 
   /* A standard-layout class is a class that:
@@ -5431,6 +5433,9 @@ explain_non_literal_class (tree t)
  if (CLASS_TYPE_P (ftype))
explain_non_literal_class (ftype);
}
+ if (CP_TYPE_VOLATILE_P (ftype))
+   inform (0,   non-static data member %q+D has 
+   volatile type, field);
}
 }
 }
Index: testsuite/g++.dg/cpp0x/constexpr-volatile.C
===
--- testsuite/g++.dg/cpp0x/constexpr-volatile.C (revision 0)
+++ testsuite/g++.dg/cpp0x/constexpr-volatile.C (working copy)
@@ -0,0 +1,26 @@
+// DR 1453
+// { dg-do compile { target c++11 } }
+
+struct S {
+  constexpr S() : n{} { }
+  volatile int n;
+};
+
+constexpr S s;  // { dg-error literal }
+
+struct Z {
+  volatile int m;
+};
+
+struct T {
+  constexpr T() : n{} { }
+  Z n;
+};
+
+constexpr T t;  // { dg-error literal }
+
+struct U : Z {
+  constexpr U() : Z{} { }
+};
+
+constexpr U u;  // { dg-error literal }
Index: testsuite/g++.dg/ext/is_literal_type2.C
===
--- testsuite/g++.dg/ext/is_literal_type2.C (revision 0)
+++ testsuite/g++.dg/ext/is_literal_type2.C (working copy)
@@ -0,0 +1,26 @@
+// DR 1453
+// { dg-do compile { target c++11 } }
+
+struct S {
+  constexpr S() : n{} { }
+  volatile int n;
+};
+
+static_assert(!__is_literal_type(S), );
+
+struct Z {
+  volatile int m;
+};
+
+struct T {
+  constexpr T() : n{} { }
+  Z n;
+};
+
+static_assert(!__is_literal_type(T), );
+
+struct U : Z {
+  constexpr U() : Z{} { }
+};
+
+static_assert(!__is_literal_type(U), );


Re: [patch] No allocation for empty unordered containers

2014-09-02 Thread Jonathan Wakely

On 30/08/14 20:03 +0200, François Dumont wrote:

Any news for my patch proposals ?

Regarding documentation of default minimum number of buckets, I don't 
know where it has been documented but why do we need to document it 
separately ? Could it be taken care by Doxygen ? Can't it get the 
default value from the code itself ? If not we could document it 
ourself next to the code rather than in a distinct file.


It's OK to document it with a Doxygen comment, although I think it
would be better in doc/xml/manual/containers.xml.

I'm reviewing the rest of the patch today, thanks for you patience.



[RFA:] testsuite: robustify g++.old-deja/g++.eh/badalloc1.C for 64-bit systems

2014-09-02 Thread Hans-Peter Nilsson
In a native x86_64-linux toolchain in which
eh-table-registration is done explicitly (i.e. dl_iterate_phdr
and PT_GNU_EH_FRAME is *not* assumed, as that eliminates the
issue), the memory overhead for exception-initialization goes
beyond the 32768 bytes assumed in badalloc1.C and the test fails
for reasons not intended by the test.  You may think that's
uninteresting, but presumably there are other 64-bit-systems,
perhaps even GNU-based, that act similarly.

For EH tables registered with the __register_frame_info scheme
(let's call it eh-registry as opposed to eh-phdr), the incoming
tables are not assumed to be sorted.  EH initialization then
does an initial sorting at the first exception, in which there
are calls to malloc for arrays for the sorted tables.  This is
noticable in badalloc1.C as it overrides malloc.  All this
happens at that first try{fn_throw();} with the related
comment, i.e. before the fail = 1 and the actual test in
badalloc1.C.  (There are other calls to malloc for other
unrelated initialization tasks, but for glibc systems these
resolve to a malloc in the dynamic linker.)  The sequence of
calls to malloc in badalloc1.C go like this:

Size   Purpose  Function name
132Core exception data. __cxxabiv1::__cxa_allocate_exception
88 EH table for badalloc1 start_fde_sort
   program, 9 FDE:s for the  (ditto)
   linear table.  
88 Ditto the erratic table.   
19176  Similar 2395 FDE:s for the   
   libstdc++ library, linear. 
19176  Ditto the erratic table.   
   *boom*

The boom is simply the arena size check failing in the
badalloc1.C malloc:
  // Verify that we didn't run out of memory before getting initialized.
  if (pos  arena_size)
abort ();

It seems the arena_size=32768 bytes estimate was from the
32-bit-systems era (svn logs indicate 2000).  Just scaling it
accordingly works fine, and we get to see the rest of the
allocations:
1344   166 FDE:s in libgcc_s, linear table
1344   Ditto erratic.

For a -m32 run, the corresponding allocation-size series is
100, 66, 66, 9592, 9592, 648, 648.

(*) for one reason or another.  Maybe the GNU linker is not used
or *really* outdated (before 2001-12-13, 2.12) or glibc is
*really* outdated (before 2001-07-25, 2.2.4).  Or inhibit_libc
accidentally set, or a compatibility scheme forcing eh-registry.
More about that in a later post.

Having investigated the related test-suite failure, I suggest to
eliminate it by robustifying the arena size a bit (or 32 :).  I
don't touch the other arena_size definitions because (1) those
numbers are presumably already fine for the related systems,
those still alive, and (2) I don't like changing stuff I cannot
test.

Other observations: I guess there are similar copyright notices
in the test-suite may need some general attention.  The xfail
list may benefit from tweaking; at least replacing the xstormy16
with int32plus or similar to cover the array size overflowing
16-bit addresses.

Ok to commit?  (Note the changelog-conditional-prefix
continued-line format.)

gcc/testsuite:

* g++.old-deja/g++.eh/badalloc1.C [!STACK_SIZE  !__FreeBSD__]
[!__sun__  !__hpux__] (arena_size): Scale according to
target pointer size.

Index: g++.old-deja/g++.eh/badalloc1.C
===
--- g++.old-deja/g++.eh/badalloc1.C (revision 214810)
+++ g++.old-deja/g++.eh/badalloc1.C (working copy)
@@ -3,7 +3,7 @@
 // itself call malloc(), and will fail if there is no more
 // memory available.
 // { dg-do run { xfail { { xstormy16-*-* *-*-darwin[3-7]* } || vxworks_rtp } } 
}
-// Copyright (C) 2000, 2002, 2003, 2010, 2012 Free Software Foundation, Inc.
+// Copyright (C) 2000, 2002, 2003, 2010, 2012, 2014 Free Software Foundation, 
Inc.
 // Contributed by Nathan Sidwell 6 June 2000 nat...@codesourcery.com
 
 // Check we can throw a bad_alloc exception when malloc dies.
@@ -23,7 +23,10 @@ const int arena_size = 256;
 // FreeBSD 5 now requires over 131072 bytes.
 const int arena_size = 262144;
 #else
-const int arena_size = 32768;
+// Because pointers make up the bulk of our exception-initialization
+// allocations, we scale by the pointer size from the original
+// 32-bit-systems-based estimate.
+const int arena_size = 32768 * ((sizeof (void *) + 3)/4);
 #endif
 #endif
 
brgds, H-P


[PATCH][PR debug/60655] Power/GCC: Reject cross-section symbol subtraction

2014-09-02 Thread Maciej W. Rozycki
Hi,

 Similarly to ARM, where this issue was seen originally, and likely many 
other targets, the Power ABI does not appear to have a relocation defined 
to support taking a difference of two symbols in different sections each. 
This is seen as a failure in gcc.c-torture/compile/pr60655-2.c:

Executing on host: powerpc-linux-gnu-gcc  -fno-diagnostics-show-caret 
-fdiagnostics-color=never   -O3 -g  -w -c  -o pr60655-2.o 
.../gcc/testsuite/gcc.c-torture/compile/pr60655-2.c(timeout = 300)
/tmp/ccAfNLMj.s: Assembler messages:
/tmp/ccAfNLMj.s:932: Error: can't resolve `L0^A' {*ABS* section} - `.LANCHOR0' 
{.bss section}
/tmp/ccAfNLMj.s:932: Error: expression too complex
compiler exited with status 1
output is:
/tmp/ccAfNLMj.s: Assembler messages:
/tmp/ccAfNLMj.s:932: Error: can't resolve `L0^A' {*ABS* section} - `.LANCHOR0' 
{.bss section}
/tmp/ccAfNLMj.s:932: Error: expression too complex

FAIL: gcc.c-torture/compile/pr60655-2.c  -O3 -g  (test for excess errors)
Excess errors:
/tmp/ccAfNLMj.s:932: Error: can't resolve `L0^A' {*ABS* section} - `.LANCHOR0' 
{.bss section}
/tmp/ccAfNLMj.s:932: Error: expression too complex

Here's a port of the original ARM fix (commit 209269), that removes the 
failure for me.

 Regression-tested with the following powerpc-gnu-linux multilibs:

-mcpu=603e
-mcpu=603e -msoft-float
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe
-mcpu=7400 -maltivec -mabi=altivec
-mcpu=e6500 -maltivec -mabi=altivec
-mcpu=e5500 -m64
-mcpu=e6500 -m64 -maltivec -mabi=altivec

 OK for trunk and 4.9?

2014-09-02  Maciej W. Rozycki  ma...@codesourcery.com

PR debug/60655
* config/rs6000/rs6000.c (rs6000_const_not_ok_for_debug_p):
Reject MINUS with SYM_REFs in different sections.

  Maciej

gcc-rs6000-minus-not-ok-for-debug.diff
Index: gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.c
===
--- gcc-fsf-trunk-quilt.orig/gcc/config/rs6000/rs6000.c 2014-08-26 
20:30:10.348973028 +0100
+++ gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.c  2014-09-01 
17:09:23.748927487 +0100
@@ -6974,7 +6974,13 @@ rs6000_delegitimize_address (rtx orig_x)
 
 /* Return true if X shouldn't be emitted into the debug info.
The linker doesn't like .toc section references from
-   .debug_* sections, so reject .toc section symbols.  */
+   .debug_* sections, so reject .toc section symbols.
+
+   Also as a temporary fix for PR60655 we reject certain MINUS
+   expressions.  Ideally we need to handle most of these cases in
+   the generic part but currently we reject minus (..) (sym_ref).
+   We try to ameliorate the case with minus (sym_ref1) (sym_ref2)
+   where they are in the same section.  */
 
 static bool
 rs6000_const_not_ok_for_debug_p (rtx x)
@@ -6988,6 +6994,35 @@ rs6000_const_not_ok_for_debug_p (rtx x)
return true;
 }
 
+  if (GET_CODE (x) == MINUS)
+{
+  tree decl_op0 = NULL;
+  tree decl_op1 = NULL;
+
+  if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+   {
+decl_op1 = SYMBOL_REF_DECL (XEXP (x, 1));
+if (decl_op1
+ GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+ (decl_op0 = SYMBOL_REF_DECL (XEXP (x, 0
+  {
+if ((TREE_CODE (decl_op1) == VAR_DECL
+ || TREE_CODE (decl_op1) == CONST_DECL)
+ (TREE_CODE (decl_op0) == VAR_DECL
+|| TREE_CODE (decl_op0) == CONST_DECL))
+  return (get_variable_section (decl_op1, false)
+  != get_variable_section (decl_op0, false));
+
+if (TREE_CODE (decl_op1) == LABEL_DECL
+ TREE_CODE (decl_op0) == LABEL_DECL)
+  return (DECL_CONTEXT (decl_op1)
+  != DECL_CONTEXT (decl_op0));
+  }
+
+return true;
+   }
+}
+
   return false;
 }
 


Re: Fix libgomp crash without TLS (PR42616)

2014-09-02 Thread Varvara Rainchik
May I use gomp_free_thread as a destructor for pthread_key_create?
Then I'll make initial_thread_tls_data global for the first case, but
how can I differentiate thread created by gomp_thread_start (second
case)?

2014-09-01 14:51 GMT+04:00 Jakub Jelinek ja...@redhat.com:
 On Fri, Aug 29, 2014 at 10:40:57AM -0700, Richard Henderson wrote:
 On 08/06/2014 03:05 AM, Varvara Rainchik wrote:
  * libgomp.h (gomp_thread): For non TLS case create thread data.
  * team.c (create_non_tls_thread_data): New function.
 
 
  ---
  diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
  index a1482cc..cf3ec8f 100644
  --- a/libgomp/libgomp.h
  +++ b/libgomp/libgomp.h
  @@ -479,9 +479,15 @@ static inline struct gomp_thread *gomp_thread (void)
  }
  #else
  extern pthread_key_t gomp_tls_key;
  +extern struct gomp_thread *create_non_tls_thread_data (void);
  static inline struct gomp_thread *gomp_thread (void)
  {
  -  return pthread_getspecific (gomp_tls_key);
  +  struct gomp_thread *thr = pthread_getspecific (gomp_tls_key);
  +  if (thr == NULL)
  +  {
  +thr = create_non_tls_thread_data ();
  +  }
  +  return thr;
  }

 This should never happen.

 I guess it can happen if you mix up explicit pthread_create and libgomp APIs.
 initialize_team will only initialize it in the initial thread, while if you
 use #pragma omp ... or omp_* calls from a thread created with
 pthread_create, in the !HAVE_TLS case pthread_getspecific will return NULL.

 Now, the patch doesn't handle that case completely though (and is badly
 formatted), the problem is that if we allocate in the !HAVE_TLS case
 in non-initial thread the TLS data, we want to free them again, so that
 would mean pthread_key_create with non-NULL destructor, and then we need to
 differentiate in between the 3 cases - key equal to initial_thread_tls_data
 (would need to move out of the block context), no freeing needed, thread
 created by gomp_thread_start, no freeing needed, otherwise free.

 The thread-specific data is set in gomp_thread_start and initialize_team.

 Where are you getting a call to gomp_thread that hasn't been through one of
 those functions?

 Jakub


Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass

2014-09-02 Thread Segher Boessenkool
On Tue, Sep 02, 2014 at 10:02:38AM +0800, Bin.Cheng wrote:
  Archaeology suggests this check is because the clobber might be an
  earlyclobber.  Which seems silly: how can it be a valid insn at all
  in that case?  It seems to me the check can just be removed.  That
  will hide your issue, maybe even solve it (but I doubt it).
  Silly for other reasons, namely that earlyclobber doesn't come into play
  until after combine (register allocation and later).
 
  The last change to this code was by Ulrich (cc:ed); in that thread (June
  2004, mostly not threaded in the mail archive, broken MUAs :-( ) it was
  said that any clobber should be considered an earlyclobber (an RTL insn
  can expand to multiple machine instructions, for example).  But I don't
  see how that can matter for dest here (the dest of insn, that's 76
  in the example), only for src.
 
  The version of flags set in 76 obviously dies in 77 (it clobbers the
  reg after all), but there is no way it could clobber it before it uses
  it, that just makes no sense.  And in the combined insn that version of
  flags does not exist at all.
 Agreed, otherwise it would be another uninitialized use problem.
 Maybe the check is too strict here?  Do you have some archived page
 address for that, just saving us some time for digging.

http://gcc.gnu.org/ml/gcc-patches/2004-06/msg00994.html
(and look in that month's archives for the rest of the messages).

 My only concern is, logic in dictribute_notes should also be revisited
 under this BZ.  I think the issue will be hidden by changes we are
 talking about in can_combine_p.

Yes.  Unless we disallow all combinations that *would* cause problems :-)


Segher


Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass

2014-09-02 Thread Segher Boessenkool
On Mon, Sep 01, 2014 at 09:28:09PM -0600, Jeff Law wrote:
 Note that in this case we're talking about a hard register, not a pseudo.
 I was referring to r84 in Bin's message, not the condition code 
 register.  Unless I missed something it's set at the start of the 
 sequence to the value 0, then later to -ltu(flags,cc,0).

Bin said that the three-insn combination is refused because of the flags
register, not r84.  So either the four-insn combination should do those
same checks, or we should allow it, or both.

 There's no good reason I can see why we're reusing a pseudo like that. 
 I suspect that if we go back, fix whatever's creating that lame sequence 
 and simply reject combinations involving a pseudo set more than once it 
 won't affect code in any real way.  If we wanted to be anal about it, 
 we'd put in some kind of debugging note and someone could do some wider 
 scale testing.

All that, too :-)  Although it all seems to work fine for two-insn and
three-insn combinations.


Segher


Re: [PATCH] support ggc hash_map and hash_set

2014-09-02 Thread Trevor Saunders
On Tue, Sep 02, 2014 at 10:36:27AM +0200, Richard Biener wrote:
 On Tue, Sep 2, 2014 at 3:56 AM,  tsaund...@mozilla.com wrote:
  From: Trevor Saunders tsaund...@mozilla.com
 
  Hi,
 
  There are still some issues to make this work really nicely, but this part 
  is
  probably good enough its worth reviewing.
 
  For one thing you can't use ggc hash_map or set in front ends with some 
  types
  or gengtype will decide to put the overloads of the marking routines it
  provides in a front end file instead of the one it choose before breaking 
  other
  front ends.  However that seems to be an unrelated issue you can trigger it
  without using hash_map/set, so we might as well solve it separetly.
 
  I had to have the entry marking functions for set deligate to the traits 
  class
  because gcc  4.9.1 issues clearly bogus errors if you inline the code from 
  the
  traits implementation.  We may well want to make map work the same way at 
  some
  point to enable some of the special GTY attributes like if_marked, but it
  doesn't seem to be necessary right now.
 
  bootstrapped + regtested without regressions on x86_64-unknown-linux-gnu, 
  ok?
 
 Ok if you make the gcc_assert()s in the marking routines 
 gcc_checking_assert()s.

sure

 Btw - do manual markers need any special support for finalizers?

I don't think so since marking and finalizers are basically independant.

 Does the hash table need any special support to make finalizers efficient
 (avoid recording for each entry if stored in-place?)

Well, it actually just stores one for the vector of elements.  Howevr I
think right now destructor may be called twice once by the
Traits::remove call in ~hash_table (), and once as a finalizer.  It
probably actually makes sense to stop registering finalizers for the
entries vector and just use the call to Traits::remove, which would be
consistant with non gc hash maps.

Trev

 
 Thanks,
 Richard.
 
  Trev
 
  gcc/ChangeLog:
 
  2014-09-01  Trevor Saunders  tsaund...@mozilla.com
 
  * alloc-pool.c: Include coretypes.h.
  * cgraph.h, dbxout.c, dwarf2out.c, except.c, except.h, function.c,
  function.h, symtab.c, tree-cfg.c, tree-eh.c: Use hash_map and
  hash_set instead of htab.
  * ggc-page.c (in_gc): New variable.
  (ggc_free): Do nothing if a collection is taking place.
  (ggc_collect): Set in_gc appropriately.
  * ggc.h (gt_ggc_mx(const char *)): New function.
  (gt_pch_nx(const char *)): Likewise.
  (gt_ggc_mx(int)): Likewise.
  (gt_pch_nx(int)): Likewise.
  * hash-map.h (hash_map::hash_entry::ggc_mx): Likewise.
  (hash_map::hash_entry::pch_nx): Likewise.
  (hash_map::hash_entry::pch_nx_helper): Likewise.
  (hash_map::hash_map): Adjust.
  (hash_map::create_ggc): New function.
  (gt_ggc_mx): Likewise.
  (gt_pch_nx): Likewise.
  * hash-set.h (default_hashset_traits::ggc_mx): Likewise.
  (default_hashset_traits::pch_nx): Likewise.
  (hash_set::hash_entry::ggc_mx): Likewise.
  (hash_set::hash_entry::pch_nx): Likewise.
  (hash_set::hash_entry::pch_nx_helper): Likewise.
  (hash_set::hash_set): Adjust.
  (hash_set::create_ggc): New function.
  (hash_set::elements): Likewise.
  (gt_ggc_mx): Likewise.
  (gt_pch_nx): Likewise.
  * hash-table.h (hash_table::hash_table): Adjust.
  (hash_table::m_ggc): New member.
  (hash_table::~hash_table): Adjust.
  (hash_table::expand): Likewise.
  (hash_table::empty): Likewise.
  (gt_ggc_mx): New function.
  (hashtab_entry_note_pointers): Likewise.
  (gt_pch_nx): Likewise.
 
 
  diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c
  index 0d31835..bfaa0e4 100644
  --- a/gcc/alloc-pool.c
  +++ b/gcc/alloc-pool.c
  @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 
   #include config.h
   #include system.h
  +#include coretypes.h
   #include alloc-pool.h
   #include hash-table.h
   #include hash-map.h
  diff --git a/gcc/cgraph.h b/gcc/cgraph.h
  index 879899c..030a1c7 100644
  --- a/gcc/cgraph.h
  +++ b/gcc/cgraph.h
  @@ -1604,7 +1604,6 @@ struct cgraph_2node_hook_list;
 
   /* Map from a symbol to initialization/finalization priorities.  */
   struct GTY(()) symbol_priority_map {
  -  symtab_node *symbol;
 priority_type init;
 priority_type fini;
   };
  @@ -1872,7 +1871,7 @@ public:
 htab_t GTY((param_is (symtab_node))) assembler_name_hash;
 
 /* Hash table used to hold init priorities.  */
  -  htab_t GTY ((param_is (symbol_priority_map))) init_priority_hash;
  +  hash_mapsymtab_node *, symbol_priority_map *init_priority_hash;
 
 FILE* GTY ((skip)) dump_file;
 
  diff --git a/gcc/dbxout.c b/gcc/dbxout.c
  index 946f1d1..d856bdd 100644
  --- a/gcc/dbxout.c
  +++ b/gcc/dbxout.c
  @@ -2484,12 +2484,9 @@ dbxout_expand_expr (tree expr)
   /* Helper function for output_used_types.  Queue one entry from the
  used types hash to be output.  */
 
  -static int
  -output_used_types_helper (void 

Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass

2014-09-02 Thread Ulrich Weigand
Segher Boessenkool wreote:
 On Mon, Sep 01, 2014 at 10:39:10AM -0600, Jeff Law wrote:
  On 09/01/14 05:38, Segher Boessenkool wrote:
  On Mon, Sep 01, 2014 at 11:36:07AM +0800, Bin.Cheng wrote:
  In the testcase (and comment in the proposed patch), why is combine
  combining four insns at all?  That means it rejected combining just the
  first three.  Why did it do that?
  It is explicitly reject by below code in can_combine_p.
  
 if (GET_CODE (PATTERN (i3)) == PARALLEL)
   for (i = XVECLEN (PATTERN (i3), 0) - 1; i = 0; i--)
 if (GET_CODE (XVECEXP (PATTERN (i3), 0, i)) == CLOBBER)
   {
 /* Don't substitute for a register intended as a clobberable
operand.  */
 rtx reg = XEXP (XVECEXP (PATTERN (i3), 0, i), 0);
 if (rtx_equal_p (reg, dest))
   return 0;
  
  Since insn i2 in the list of i0/i1/i2 as below contains parallel
  clobber of dest_of_insn76/use_of_insn77.
  32: r84:SI=0
  76: flags:CC=cmp(r84:SI,0x1)
 REG_DEAD r84:SI
  77: {r84:SI=-ltu(flags:CC,0);clobber flags:CC;}
 REG_DEAD flags:CC
 REG_UNUSED flags:CC
  
  Archaeology suggests this check is because the clobber might be an
  earlyclobber.  Which seems silly: how can it be a valid insn at all
  in that case?  It seems to me the check can just be removed.  That
  will hide your issue, maybe even solve it (but I doubt it).
  Silly for other reasons, namely that earlyclobber doesn't come into play 
  until after combine (register allocation and later).
 
 The last change to this code was by Ulrich (cc:ed); in that thread (June
 2004, mostly not threaded in the mail archive, broken MUAs :-( ) it was
 said that any clobber should be considered an earlyclobber (an RTL insn
 can expand to multiple machine instructions, for example).  But I don't
 see how that can matter for dest here (the dest of insn, that's 76
 in the example), only for src.
 
 The version of flags set in 76 obviously dies in 77 (it clobbers the
 reg after all), but there is no way it could clobber it before it uses
 it, that just makes no sense.  And in the combined insn that version of
 flags does not exist at all.

This seems the time period where the email archive is not fully complete;
some of the mails of that 2004 thread apparently were not linked into the
monthly thread list.  This archive seems to have them all:
http://marc.info/?t=108747834900012r=1w=2

In any case, this test in can_combine_p rejects a combination for *two*
different issues.  One is the earlyclobber problem, which is what that
2004 thread was about, and which my patch back then relaxed for fixed
hard register.

However, this doesn't seem to apply to the example above; that is really
about the second problem: don't substitute into a clobber.

I understand the reason why this particular substitution is rejected is
simply that if it weren't, we'd be substituting flags:CC=cmp(r84:SI,0x1)
into clobber flags:CC, resulting in clobber cmp(r84:SI,0x1), which is
invalid RTL.

Now I guess this check could be relaxed if somewhere else in combine we'd
recognize the substitution into a clobber and simply omit it in that case.

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  ulrich.weig...@de.ibm.com



[PATCH][match-and-simplify] Fix single RHS code-gen

2014-09-02 Thread Richard Biener

Appearantly we didn't exercise this before and thus it has gone
unnoticed that we don't properly special case single-RHSs on
GIMPLE.

Fixed as follows.

Bootstrapped on x86_64-unknown-linux-gnu, applied.

Richard.

2014-09-02  Richard Biener  rguent...@suse.de

* gimple-match-head.c (maybe_build_generic_op): New function.
(maybe_push_res_to_seq): Use it.
* gimple-match.h (maybe_build_generic_op): Declare.
* gimple-fold.c (fold_stmt_1): Use maybe_build_generic_op.

Index: gcc/gimple-match-head.c
===
--- gcc/gimple-match-head.c (revision 214795)
+++ gcc/gimple-match-head.c (working copy)
@@ -267,6 +267,27 @@ gimple_resimplify3 (gimple_seq *seq,
 }
 
 
+/* If in GIMPLE expressions with CODE go as single-rhs build
+   a GENERIC tree for that expression into *OP0.  */
+
+void
+maybe_build_generic_op (enum tree_code code, tree type,
+   tree *op0, tree op1, tree op2)
+{
+  switch (code)
+{
+case REALPART_EXPR:
+case IMAGPART_EXPR:
+case VIEW_CONVERT_EXPR:
+  *op0 = build1 (code, type, *op0);
+  break;
+case BIT_FIELD_REF:
+  *op0 = build3 (code, type, *op0, op1, op2);
+  break;
+default:;
+}
+}
+
 /* Push the exploded expression described by RCODE, TYPE and OPS
as a statement to SEQ if necessary and return a gimple value
denoting the value of the expression.  If RES is not NULL
@@ -286,8 +307,6 @@ maybe_push_res_to_seq (code_helper rcode
return ops[0];
   if (!seq)
return NULL_TREE;
-  if (!res)
-   res = make_ssa_name (type, NULL);
   /* Play safe and do not allow abnormals to be mentioned in
  newly created statements.  */
   if ((TREE_CODE (ops[0]) == SSA_NAME
@@ -299,6 +318,9 @@ maybe_push_res_to_seq (code_helper rcode
   TREE_CODE (ops[2]) == SSA_NAME
   SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2])))
return NULL_TREE;
+  if (!res)
+   res = make_ssa_name (type, NULL);
+  maybe_build_generic_op (rcode, type, ops[0], ops[1], ops[2]);
   gimple new_stmt = gimple_build_assign_with_ops (rcode, res,
  ops[0], ops[1], ops[2]);
   gimple_seq_add_stmt_without_update (seq, new_stmt);
@@ -311,8 +333,6 @@ maybe_push_res_to_seq (code_helper rcode
   tree decl = builtin_decl_implicit (rcode);
   if (!decl)
return NULL_TREE;
-  if (!res)
-   res = make_ssa_name (type, NULL);
   unsigned nargs = type_num_arguments (TREE_TYPE (decl));
   gcc_assert (nargs = 3);
   /* Play safe and do not allow abnormals to be mentioned in
@@ -326,6 +346,8 @@ maybe_push_res_to_seq (code_helper rcode
   TREE_CODE (ops[2]) == SSA_NAME
   SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2])))
return NULL_TREE;
+  if (!res)
+   res = make_ssa_name (type, NULL);
   gimple new_stmt = gimple_build_call (decl, nargs, ops[0], ops[1], 
ops[2]);
   gimple_call_set_lhs (new_stmt, res);
   gimple_seq_add_stmt_without_update (seq, new_stmt);
Index: gcc/gimple-match.h
===
--- gcc/gimple-match.h  (revision 214795)
+++ gcc/gimple-match.h  (working copy)
@@ -44,6 +44,7 @@ bool gimple_simplify (gimple, code_helpe
  tree (*)(tree));
 tree maybe_push_res_to_seq (code_helper, tree, tree *,
gimple_seq *, tree res = NULL_TREE);
+void maybe_build_generic_op (enum tree_code, tree, tree *, tree, tree);
 
 
 #endif  /* GCC_GIMPLE_MATCH_H */
Index: gcc/gimple-fold.c
===
--- gcc/gimple-fold.c   (revision 214795)
+++ gcc/gimple-fold.c   (working copy)
@@ -2904,6 +2904,9 @@ fold_stmt_1 (gimple_stmt_iterator *gsi,
TREE_CODE (ops[2]) == SSA_NAME
SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2]
{
+ maybe_build_generic_op (rcode,
+ TREE_TYPE (gimple_assign_lhs (stmt)),
+ ops[0], ops[1], ops[2]);
  gimple_assign_set_rhs_with_ops_1 (gsi, rcode,
ops[0], ops[1], ops[2]);
  if (dump_file  (dump_flags  TDF_DETAILS))


Add missing Broadwell intrinsics.

2014-09-02 Thread Ilya Tocar
Hi,

Along with intrinsics for adcx/adox (supported since 4.8) ICC also
added intrinsics for adc/sbb [1]. This patch adds them.
Bootstraps/passes make-check. Ok for trunk?

[1] 
http://www.xlsoft.com/jp/products/intel/compilers/ccm/2013/Release_Notes_u3.pdf

ChangeLog below:

gcc/

2014-09-02  Ilya Tocar  ilya.to...@intel.com

* config/i386/adxintrin.h (_subborrow_u32): New.
(_addcarry_u32): Ditto.
(_subborrow_u64): Ditto.
(_addcarry_u64): Ditto.
* config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_SBB32,
IX86_BUILTIN_SBB64.
(ix86_init_mmx_sse_builtins): Handle __builtin_ia32_sbb_u32,
__builtin_ia32_sbb_u64


testsuite/

2014-09-02  Ilya Tocar  ilya.to...@intel.com

* gcc.target/i386/adx-addcarryx32-1.c: Test addcarry, subborrow.
* gcc.target/i386/adx-addcarryx32-2.c: Ditto.
* gcc.target/i386/adx-addcarryx32-3.c: Ditto.
* gcc.target/i386/adx-addcarryx64-1.c: Ditto.
* gcc.target/i386/adx-addcarryx64-2.c: Ditto.
* gcc.target/i386/adx-addcarryx64-3.c: Ditto.

---
 gcc/config/i386/adxintrin.h   | 32 +++
 gcc/config/i386/i386.c| 22 
 gcc/testsuite/gcc.target/i386/adx-addcarryx32-1.c |  5 +++-
 gcc/testsuite/gcc.target/i386/adx-addcarryx32-2.c | 27 +++
 gcc/testsuite/gcc.target/i386/adx-addcarryx32-3.c |  5 +++-
 gcc/testsuite/gcc.target/i386/adx-addcarryx64-1.c |  5 +++-
 gcc/testsuite/gcc.target/i386/adx-addcarryx64-2.c | 27 +++
 gcc/testsuite/gcc.target/i386/adx-addcarryx64-3.c |  5 +++-
 8 files changed, 124 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
index 6118900..8f2c01a 100644
--- a/gcc/config/i386/adxintrin.h
+++ b/gcc/config/i386/adxintrin.h
@@ -30,6 +30,22 @@
 
 extern __inline unsigned char
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_subborrow_u32 (unsigned char __CF, unsigned int __X,
+   unsigned int __Y, unsigned int *__P)
+{
+return __builtin_ia32_sbb_u32 (__CF, __Y, __X, __P);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarry_u32 (unsigned char __CF, unsigned int __X,
+  unsigned int __Y, unsigned int *__P)
+{
+return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _addcarryx_u32 (unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P)
 {
@@ -39,6 +55,22 @@ _addcarryx_u32 (unsigned char __CF, unsigned int __X,
 #ifdef __x86_64__
 extern __inline unsigned char
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_subborrow_u64 (unsigned char __CF, unsigned long __X,
+   unsigned long __Y, unsigned long long *__P)
+{
+return __builtin_ia32_sbb_u64 (__CF, __Y, __X, __P);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_addcarry_u64 (unsigned char __CF, unsigned long __X,
+  unsigned long __Y, unsigned long long *__P)
+{
+return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _addcarryx_u64 (unsigned char __CF, unsigned long __X,
unsigned long __Y, unsigned long long *__P)
 {
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e4c93e..91b5d06 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28778,6 +28778,10 @@ enum ix86_builtins
   IX86_BUILTIN_ADDCARRYX32,
   IX86_BUILTIN_ADDCARRYX64,
 
+  /* ADC/SBB instructions.  */
+  IX86_BUILTIN_SBB32,
+  IX86_BUILTIN_SBB64,
+
   /* FSGSBASE instructions.  */
   IX86_BUILTIN_RDFSBASE32,
   IX86_BUILTIN_RDFSBASE64,
@@ -31213,6 +31217,14 @@ ix86_init_mmx_sse_builtins (void)
   UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
   IX86_BUILTIN_ADDCARRYX64);
 
+  /* ADX/SBB */
+  def_builtin (0, __builtin_ia32_sbb_u32,
+  UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
+  def_builtin (OPTION_MASK_ISA_64BIT,
+  __builtin_ia32_sbb_u64,
+  UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
+  IX86_BUILTIN_SBB64);
+
   /* Read/write FLAGS.  */
   def_builtin (~OPTION_MASK_ISA_64BIT, __builtin_ia32_readeflags_u32,
UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
@@ -35617,6 +35629,16 @@ rdseed_step:
   emit_insn (gen_zero_extendqisi2 (target, op2));
   return target;
 
+case IX86_BUILTIN_SBB32:
+  icode = CODE_FOR_subsi3_carry;
+  mode0 = SImode;
+  goto addcarryx;
+
+case IX86_BUILTIN_SBB64:
+  icode = CODE_FOR_subdi3_carry;
+  mode0 = DImode;
+  goto addcarryx;
+
 case IX86_BUILTIN_ADDCARRYX32:
   icode = TARGET_ADX ? 

Re: [PATCH][0/7][ARM] Convert VFP mnemonics to UAL

2014-09-02 Thread Kyrill Tkachov

Ping on this series?

Thanks,
Kyrill

On 19/08/14 16:04, Kyrill Tkachov wrote:

Hi all,

This patch series converts the arm backend to output unified assembly
syntax for the VFP instructions.
This makes it more readable since most UAL mnemonics also include
various type suffixes such as .f32 and .f64 that quickly allow to
identify the data types being operated on.

Each patch is independent of the rest and can be applied in any order.

Bootstrapped and tested on arm-none-linux-gnueabihf with gas from
binutils 2.22 and newer.
Compiled various floating point benchmarks to make sure the binaries are
identical.

Ok for trunk?

Thanks,
Kyrill

   gcc/config/arm/arm-protos.h  |2 +-
   gcc/config/arm/arm.c |   12 -
   gcc/config/arm/arm.md|2 +-
   gcc/config/arm/vfp.md|   91
++
   gcc/testsuite/gcc.target/arm/vfp-1.c |   68 -
   5 files changed, 93 insertions(+), 82 deletions(-)








[FORTRAN PATCH] Two -Wlogical-not-parentheses fixes (PR fortran/62270)

2014-09-02 Thread Marek Polacek
(Now for the real fix.)
This patch fixes the last two spots where -Wlogical-not-parentheses
warns.  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62270#c3
if you want more info about the changes.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2014-09-02  Marek Polacek  pola...@redhat.com

PR fortran/62270
* interface.c (compare_parameter): Fix condition.
* trans-expr.c (gfc_conv_procedure_call): Likewise.

* gfortran.dg/pointer_intent_7.f90: Adjust dg-error.

diff --git gcc/fortran/interface.c gcc/fortran/interface.c
index b210d18..f6233b7 100644
--- gcc/fortran/interface.c
+++ gcc/fortran/interface.c
@@ -2014,7 +2014,7 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual,
   if (formal-ts.type == BT_CLASS  formal-attr.class_ok
actual-expr_type != EXPR_NULL
((CLASS_DATA (formal)-attr.class_pointer
-   !formal-attr.intent == INTENT_IN)
+   formal-attr.intent != INTENT_IN)
   || CLASS_DATA (formal)-attr.allocatable))
 {
   if (actual-ts.type != BT_CLASS)
diff --git gcc/fortran/trans-expr.c gcc/fortran/trans-expr.c
index f2ed474..4c057ee 100644
--- gcc/fortran/trans-expr.c
+++ gcc/fortran/trans-expr.c
@@ -4589,7 +4589,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
   e-expr_type == EXPR_VARIABLE
   (!e-ref
  || (e-ref-type == REF_ARRAY
-  !e-ref-u.ar.type != AR_FULL))
+  e-ref-u.ar.type != AR_FULL))
   e-symtree-n.sym-attr.optional)
{
  tmp = fold_build3_loc (input_location, COND_EXPR,
diff --git gcc/testsuite/gfortran.dg/pointer_intent_7.f90 
gcc/testsuite/gfortran.dg/pointer_intent_7.f90
index c09eb2b..5387ace 100644
--- gcc/testsuite/gfortran.dg/pointer_intent_7.f90
+++ gcc/testsuite/gfortran.dg/pointer_intent_7.f90
@@ -23,7 +23,7 @@ contains
 call bar2 (c)
 call bar3 (c)
 call bar2p (b) ! { dg-error INTENT\\(IN\\) in pointer association context 
\\(actual argument to INTENT = OUT/INOUT }
-call bar3p (b) ! { dg-error INTENT\\(IN\\) in pointer association context 
\\(actual argument to INTENT = OUT/INOUT }
+call bar3p (b) ! { dg-error Actual argument to .n. at \\(1\\) must be 
polymorphic }
 call bar2p (c) ! { dg-error INTENT\\(IN\\) in pointer association context 
\\(actual argument to INTENT = OUT/INOUT }
 call bar3p (c) ! { dg-error INTENT\\(IN\\) in pointer association context 
\\(actual argument to INTENT = OUT/INOUT }
   end subroutine

Marek


Re: Add missing Broadwell intrinsics.

2014-09-02 Thread Uros Bizjak
On Tue, Sep 2, 2014 at 2:36 PM, Ilya Tocar tocarip.in...@gmail.com wrote:
 Hi,

 Along with intrinsics for adcx/adox (supported since 4.8) ICC also
 added intrinsics for adc/sbb [1]. This patch adds them.
 Bootstraps/passes make-check. Ok for trunk?

 [1] 
 http://www.xlsoft.com/jp/products/intel/compilers/ccm/2013/Release_Notes_u3.pdf

 ChangeLog below:

 gcc/

 2014-09-02  Ilya Tocar  ilya.to...@intel.com

 * config/i386/adxintrin.h (_subborrow_u32): New.
 (_addcarry_u32): Ditto.
 (_subborrow_u64): Ditto.
 (_addcarry_u64): Ditto.
 * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_SBB32,
 IX86_BUILTIN_SBB64.
 (ix86_init_mmx_sse_builtins): Handle __builtin_ia32_sbb_u32,
 __builtin_ia32_sbb_u64

 testsuite/

 2014-09-02  Ilya Tocar  ilya.to...@intel.com

 * gcc.target/i386/adx-addcarryx32-1.c: Test addcarry, subborrow.
 * gcc.target/i386/adx-addcarryx32-2.c: Ditto.
 * gcc.target/i386/adx-addcarryx32-3.c: Ditto.
 * gcc.target/i386/adx-addcarryx64-1.c: Ditto.
 * gcc.target/i386/adx-addcarryx64-2.c: Ditto.
 * gcc.target/i386/adx-addcarryx64-3.c: Ditto.

OK with two comment changes below.

Thanks,
Uros.

 +  /* ADC/SBB instructions.  */

Just SBB instruction.

 +  IX86_BUILTIN_SBB32,
 +  IX86_BUILTIN_SBB64,
 +
/* FSGSBASE instructions.  */
IX86_BUILTIN_RDFSBASE32,
IX86_BUILTIN_RDFSBASE64,
 @@ -31213,6 +31217,14 @@ ix86_init_mmx_sse_builtins (void)
UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
IX86_BUILTIN_ADDCARRYX64);

 +  /* ADX/SBB */

Also here, these builtins implement just SBB instruction.

 +  def_builtin (0, __builtin_ia32_sbb_u32,
 +  UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
 +  def_builtin (OPTION_MASK_ISA_64BIT,
 +  __builtin_ia32_sbb_u64,
 +  UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
 +  IX86_BUILTIN_SBB64);
 +


Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass

2014-09-02 Thread Segher Boessenkool
On Tue, Sep 02, 2014 at 02:10:32PM +0200, Ulrich Weigand wrote:
 In any case, this test in can_combine_p rejects a combination for *two*
 different issues.  One is the earlyclobber problem, which is what that
 2004 thread was about, and which my patch back then relaxed for fixed
 hard register.
 
 However, this doesn't seem to apply to the example above; that is really
 about the second problem: don't substitute into a clobber.

Right.

 I understand the reason why this particular substitution is rejected is
 simply that if it weren't, we'd be substituting flags:CC=cmp(r84:SI,0x1)
 into clobber flags:CC, resulting in clobber cmp(r84:SI,0x1), which is
 invalid RTL.

I checked, and that is indeed what combine does.  How silly.

 Now I guess this check could be relaxed if somewhere else in combine we'd
 recognize the substitution into a clobber and simply omit it in that case.

Yeah.

In the testcase, combine tries combining 76,77 (77 is that clobbering
insn) and refuses it; then it tries 32,76,77 and refuses it; and then
it tries 32,76,77,43 and allows it (it doesn't do this check at all,
77 is not i3, combine omits the clobber completely).  Which is inconsistent.

What a mess.  Thanks for looking!


Segher


[C PATCH] Backport a fix for PR62294 to 4.9

2014-09-02 Thread Marek Polacek
PR62294 reports that 4.9 does not emit an incompatible pointer type
warning in certain scenario.  I unknowingly broke this in r207335, and
then fixed it in r210980, which is a follow-up to the former.  But 4.9
doesn't have the latter.  This patch is basically a backport of r210980,
only without the traditional conversion stuff.

Bootstrapped/regtested on x86_64-linux, ok for 4.9?

2014-09-02  Marek Polacek  pola...@redhat.com

PR c/62294
* c-typeck.c (convert_arguments): Get location of a parameter.  Change
error and warning calls to error_at and warning_at.  Pass location of
a parameter to it.
(convert_for_assignment): Add parameter to WARN_FOR_ASSIGNMENT and
WARN_FOR_QUALIFIERS.  Pass expr_loc to those.

* gcc.dg/pr56724-1.c: New test.
* gcc.dg/pr56724-2.c: New test.

diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c
index 5838d6a..d096ad4 100644
--- gcc/c/c-typeck.c
+++ gcc/c/c-typeck.c
@@ -3071,6 +3071,12 @@ convert_arguments (location_t loc, veclocation_t 
arg_loc, tree typelist,
   bool excess_precision = false;
   bool npc;
   tree parmval;
+  /* Some __atomic_* builtins have additional hidden argument at
+position 0.  */
+  location_t ploc
+   = !arg_loc.is_empty ()  values-length () == arg_loc.length ()
+ ? expansion_point_location_if_in_system_header (arg_loc[parmnum])
+ : input_location;
 
   if (type == void_type_node)
{
@@ -3113,7 +3119,8 @@ convert_arguments (location_t loc, veclocation_t 
arg_loc, tree typelist,
 
  if (type == error_mark_node || !COMPLETE_TYPE_P (type))
{
- error (type of formal parameter %d is incomplete, parmnum + 1);
+ error_at (ploc, type of formal parameter %d is incomplete,
+   parmnum + 1);
  parmval = val;
}
  else
@@ -3128,34 +3135,34 @@ convert_arguments (location_t loc, veclocation_t 
arg_loc, tree typelist,
 
  if (INTEGRAL_TYPE_P (type)
   TREE_CODE (valtype) == REAL_TYPE)
-   warning (0, passing argument %d of %qE as integer 
-rather than floating due to prototype,
-argnum, rname);
+   warning_at (ploc, 0, passing argument %d of %qE as 
+   integer rather than floating due to 
+   prototype, argnum, rname);
  if (INTEGRAL_TYPE_P (type)
   TREE_CODE (valtype) == COMPLEX_TYPE)
-   warning (0, passing argument %d of %qE as integer 
-rather than complex due to prototype,
-argnum, rname);
+   warning_at (ploc, 0, passing argument %d of %qE as 
+   integer rather than complex due to 
+   prototype, argnum, rname);
  else if (TREE_CODE (type) == COMPLEX_TYPE
TREE_CODE (valtype) == REAL_TYPE)
-   warning (0, passing argument %d of %qE as complex 
-rather than floating due to prototype,
-argnum, rname);
+   warning_at (ploc, 0, passing argument %d of %qE as 
+   complex rather than floating due to 
+   prototype, argnum, rname);
  else if (TREE_CODE (type) == REAL_TYPE
INTEGRAL_TYPE_P (valtype))
-   warning (0, passing argument %d of %qE as floating 
-rather than integer due to prototype,
-argnum, rname);
+   warning_at (ploc, 0, passing argument %d of %qE as 
+   floating rather than integer due to 
+   prototype, argnum, rname);
  else if (TREE_CODE (type) == COMPLEX_TYPE
INTEGRAL_TYPE_P (valtype))
-   warning (0, passing argument %d of %qE as complex 
-rather than integer due to prototype,
-argnum, rname);
+   warning_at (ploc, 0, passing argument %d of %qE as 
+   complex rather than integer due to 
+   prototype, argnum, rname);
  else if (TREE_CODE (type) == REAL_TYPE
TREE_CODE (valtype) == COMPLEX_TYPE)
-   warning (0, passing argument %d of %qE as floating 
-rather than complex due to prototype,
-argnum, rname);
+   warning_at (ploc, 0, passing argument %d of %qE as 
+   floating rather than complex due to 
+   prototype, argnum, rname);
  /* 

[PATCH] PRE TLC

2014-09-02 Thread Richard Biener

The following patch removes dead code (blocks are never defered
because we iterate in a proper CFG order now) and avoids building
up the el_avail vector one element at a time.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2014-09-02  Richard Biener  rguent...@suse.de

* tree-ssa-pre.c (alloc_expression_id): Use quick_grow_cleared.
(struct bb_bitmap_sets): Remove deferred member.
(BB_DEFERRED): Remove.
(defer_or_phi_translate_block): Remove.
(compute_antic_aux): Remove deferring of blocks, assert
proper iteration order.
(compute_antic): Do not set BB_DEFERRED.
(eliminate): Allocate el_avail of proper size initially.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c.orig 2014-09-02 16:01:08.733146617 +0200
+++ gcc/tree-ssa-pre.c  2014-09-02 15:56:23.687166242 +0200
@@ -272,11 +272,10 @@ alloc_expression_id (pre_expr expr)
 {
   unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr));
   /* vec::safe_grow_cleared allocates no headroom.  Avoid frequent
-re-allocations by using vec::reserve upfront.  There is no
-vec::quick_grow_cleared unfortunately.  */
+re-allocations by using vec::reserve upfront.  */
   unsigned old_len = name_to_id.length ();
   name_to_id.reserve (num_ssa_names - old_len);
-  name_to_id.safe_grow_cleared (num_ssa_names);
+  name_to_id.quick_grow_cleared (num_ssa_names);
   gcc_assert (name_to_id[version] == 0);
   name_to_id[version] = expr-id;
 }
@@ -427,10 +426,6 @@ typedef struct bb_bitmap_sets
   /* True if we have visited this block during ANTIC calculation.  */
   unsigned int visited : 1;
 
-  /* True we have deferred processing this block during ANTIC
- calculation until its successor is processed.  */
-  unsigned int deferred : 1;
-
   /* True when the block contains a call that might not return.  */
   unsigned int contains_may_not_return_call : 1;
 } *bb_value_sets_t;
@@ -444,7 +439,6 @@ typedef struct bb_bitmap_sets
 #define NEW_SETS(BB)   ((bb_value_sets_t) ((BB)-aux))-new_sets
 #define EXPR_DIES(BB)  ((bb_value_sets_t) ((BB)-aux))-expr_dies
 #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)-aux))-visited
-#define BB_DEFERRED(BB) ((bb_value_sets_t) ((BB)-aux))-deferred
 #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) 
((BB)-aux))-contains_may_not_return_call
 
 
@@ -2085,26 +2079,6 @@ static sbitmap has_abnormal_preds;
 
 static sbitmap changed_blocks;
 
-/* Decide whether to defer a block for a later iteration, or PHI
-   translate SOURCE to DEST using phis in PHIBLOCK.  Return false if we
-   should defer the block, and true if we processed it.  */
-
-static bool
-defer_or_phi_translate_block (bitmap_set_t dest, bitmap_set_t source,
- basic_block block, basic_block phiblock)
-{
-  if (!BB_VISITED (phiblock))
-{
-  bitmap_set_bit (changed_blocks, block-index);
-  BB_VISITED (block) = 0;
-  BB_DEFERRED (block) = 1;
-  return false;
-}
-  else
-phi_translate_set (dest, source, block, phiblock);
-  return true;
-}
-
 /* Compute the ANTIC set for BLOCK.
 
If succs(BLOCK)  1 then
@@ -2144,30 +2118,8 @@ compute_antic_aux (basic_block block, bo
   else if (single_succ_p (block))
 {
   basic_block succ_bb = single_succ (block);
-
-  /* We trade iterations of the dataflow equations for having to
-phi translate the maximal set, which is incredibly slow
-(since the maximal set often has 300+ members, even when you
-have a small number of blocks).
-Basically, we defer the computation of ANTIC for this block
-until we have processed it's successor, which will inevitably
-have a *much* smaller set of values to phi translate once
-clean has been run on it.
-The cost of doing this is that we technically perform more
-iterations, however, they are lower cost iterations.
-
-Timings for PRE on tramp3d-v4:
-without maximal set fix: 11 seconds
-with maximal set fix/without deferring: 26 seconds
-with maximal set fix/with deferring: 11 seconds
- */
-
-  if (!defer_or_phi_translate_block (ANTIC_OUT, ANTIC_IN (succ_bb),
-   block, succ_bb))
-   {
- changed = true;
- goto maybe_dump_sets;
-   }
+  gcc_assert (BB_VISITED (succ_bb));
+  phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), block, succ_bb);
 }
   /* If we have multiple successors, we take the intersection of all of
  them.  Note that in the case of loop exit phi nodes, we may have
@@ -2187,20 +2139,11 @@ compute_antic_aux (basic_block block, bo
worklist.quick_push (e-dest);
}
 
-  /* Of multiple successors we have to have visited one already.  */
-  if (!first)
-   {
- bitmap_set_bit (changed_blocks, 

[PATCH][match-and-simplify] Add comparison patterns

2014-09-02 Thread Richard Biener

The following patch adds more comparison patterns (with comments
on what is missing still).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2014-09-02  Richard Biener  rguent...@suse.de

* fold-const.h (negate_expr_p): Declare.
* fold-const.c (negate_expr_p): Export.
* match-comparison.pd: Implement more comparison patterns.

Index: gcc/match-comparison.pd
===
*** gcc/match-comparison.pd.orig2014-09-02 11:10:06.855348847 +0200
--- gcc/match-comparison.pd 2014-09-02 13:30:52.392767381 +0200
***
*** 1,3 
--- 1,62 
+ /* From fold_binary.  */
+ 
+ (simplify
+  (ne @0 integer_zerop@1)
+  (if (TREE_CODE (TREE_TYPE (@0)) == BOOLEAN_TYPE)
+   /* ???  In GENERIC the type of the comparison may be 'int'.  */
+   (convert @0)))
+ 
+ /* Distribute operations in equality compares.  */
+ (for op in eq ne
+  /* -exp op CST is exp op -CST.  */
+  (simplify
+   (op (negate @0) INTEGER_CST@1)
+   /* ??? fix fold-const to use negate_expr_p  */
+   (if (negate_expr_p (@1))
+(op @0 (negate @1
+  /* X ^ C1 == C2 is X == (C1 ^ C2).  */
+  (simplify
+   (op (bit_xor @0 INTEGER_CST@1) INTEGER_CST@2)
+   (op @0 (bit_xor @1 @2
+ 
+ /* From fold_comparison, in the order of transforms in there.  */
+ 
+ /* Transform comparisons of the form X +- C1 CMP C2 to X CMP C2 -+ C1.  */
+ (for cmp in lt le eq ge gt ne
+  (for op in plus minus
+   (simplify
+(cmp (op @0 INTEGER_CST@1) INTEGER_CST@2)
+(if ((cmp == NE_EXPR || cmp == EQ_EXPR
+|| TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))
+(@3 = int_const_binop (op == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, 
@2, @1))
+   /* ???  fold_comparison here does, when @1 and @2 didn't have
+  TREE_OVERFLOW set, simplify the comparison to true/false
+  by using a staturated add.  */
+!TREE_OVERFLOW (@3))
+ (cmp @0 @3)
+ 
+ /* Transform comparisons of the form X - Y CMP 0 to X CMP Y.  */
+ /* ??? The transformation is valid for the other operators if overflow
+is undefined for the type, but performing it here badly interacts
+with the transformation in fold_cond_expr_with_comparison which
+attempts to synthetize ABS_EXPR.  */
+ (for cmp in eq ne
+  (simplify
+   (cmp (minus @0 @1) integer_zerop)
+   (cmp @0 @1)))
+ 
+ /* For comparisons of pointers we can decompose it to a compile time
+comparison of the base objects and the offsets into the object.
+This requires at least one operand being an ADDR_EXPR or a
+POINTER_PLUS_EXPR to do more than the operand_equal_p test below.  */
+ #if 0
+ (for cmp in lt le eq ge gt ne
+  (for op in addr pointer_plus
+   (simplify
+(cmp:c (op@0 @1 @2) @3)
+(if (simplify_addr_comparison (@0, @3, @@))
+ #endif
+ 
  /* Simplify X * C1 CMP 0 to X CMP 0 if C1 is not zero.  */
  (for op in lt le eq ne ge gt
(simplify
***
*** 13,15 
--- 72,141 
   (if (tree_int_cst_sgn (@1)  0)
(op @2 @0)
  
+ #if 0
+ /* If this is comparing a constant with a MIN_EXPR or a MAX_EXPR of a
+constant, we can simplify it.  */
+ (for op in min max
+  (for cmp in eq gt
+   (cmp (op @0 INTEGER_CST@1) INTEGER_CST@2)
+   (if (op == MAX_EXPR  tree_int_cst_compare (@1, @2) == 0)
+(le @0 @2))
+   (if (
+   )
+/* ??? optimize_minmax_comparison handles ne, lt and le by
+   recursing with an inverted comparison and then inverting
+   the result.  Or combining equality and gt with truth_or.   */)
+ #endif
+ 
+ /* Simplify comparison of something with itself.  For IEEE
+floating-point, we can only do some of these simplifications.  */
+ (for cmp in ge le
+  (simplify
+   (cmp @0 @0)
+   (eq @0 @0)))
+ (simplify
+  (eq @0 @0)
+  (if (! FLOAT_TYPE_P (TREE_TYPE (@0))
+   || ! HONOR_NANS (TYPE_MODE (TREE_TYPE (@0
+   { constant_boolean_node (true, type); }))
+ (for cmp in ne gt lt
+  (simplify
+   (cmp @0 @0)
+   (if (cmp != NE_EXPR
+|| ! FLOAT_TYPE_P (TREE_TYPE (@0))
+|| ! HONOR_NANS (TYPE_MODE (TREE_TYPE (@0
+{ constant_boolean_node (false, type); })))
+ 
+ /* Need to split up the cases in twoval_comparison_p.  */
+ 
+ #if 0
+ /* We can fold X/C1 op C2 where C1 and C2 are integer constants
+into a single range test.  */
+ (for cmp in lt le eq ge gt ne
+  (for div in trunc_div exact_div
+   (simplify
+(cmp (div @0 INTEGER_CST@1) INTEGER_CST)
+(if (!integer_zerop (@1))
+/* ???  Need to think about what fold_div_compare does.  IMHO
+   we can unconditionally build a
+   (unsigned)@0 +- CST = CST'
+   range check.  */
+   
+ #endif
+ 
+ /* Fold ~X op ~Y as Y op X.  */
+ (for cmp in lt le eq ge gt ne
+  (simplify
+   (cmp (bit_not @0) (bit_not @1))
+   (cmp @1 @0)))
+ 
+ /* Fold ~X op C as X op' ~C, where op' is the swapped comparison.  */
+ (for cmp in lt le eq ge gt ne
+  (simplify
+   (cmp (bit_not @0) @1)
+   /* ???  (for cst in INTEGER_CST 

Re: [C++ Patch] DR 1453

2014-09-02 Thread Jason Merrill

OK.

Jason


Re: [C++ Patch] PR 58102 aka DR 1405

2014-09-02 Thread Paolo Carlini

Hi,

On 09/02/2014 04:11 PM, Jason Merrill wrote:

On 09/01/2014 09:47 AM, Paolo Carlini wrote:

-constexpr A b = a;// { dg-error mutable }
+constexpr A b = a;


This is wrong; we still need to get an error here.
Hum, interesting. Neither current EDG nor current clang error out there. 
Let's see if I can tease the case out...


Thanks,
Paolo.


Re: [C++ Patch] PR 58102 aka DR 1405

2014-09-02 Thread Jason Merrill

On 09/01/2014 09:47 AM, Paolo Carlini wrote:

-constexpr A b = a; // { dg-error mutable }
+constexpr A b = a;


This is wrong; we still need to get an error here.

Jason



Re: [C++ Patch] PR 58102 aka DR 1405

2014-09-02 Thread Jason Merrill

On 09/02/2014 10:17 AM, Paolo Carlini wrote:

Let's see if I can tease the case out...


I think you need to leave that hunk alone, and instead fix the new 
testcase by treating = {} more like {}, just as we already don't require 
a copy constructor call for copy-list-initialization.


Jason



Re: Enable EBX for x86 in 32bits PIC code

2014-09-02 Thread Vladimir Makarov
On 08/29/2014 02:47 AM, Ilya Enkovich wrote:
 Seems your patch doesn't cover all cases.  Attached is a modified
 patch (with your changes included) and a test where double constant is
 wrongly rematerialized.  I also see in ira dump that there is still a
 copy of PIC reg created:

 Initialization of original PIC reg:
 (insn 23 22 24 2 (set (reg:SI 127)
 (reg:SI 3 bx)) test.cc:42 90 {*movsi_internal}
  (expr_list:REG_DEAD (reg:SI 3 bx)
 (nil)))
 ...
 Copy is created:
 (insn 135 37 25 3 (set (reg:SI 138 [127])
 (reg:SI 127)) 90 {*movsi_internal}
  (expr_list:REG_DEAD (reg:SI 127)
 (nil)))
 ...
 Copy is used:
 (insn 119 25 122 3 (set (reg:DF 134)
 (mem/u/c:DF (plus:SI (reg:SI 138 [127])
 (const:SI (unspec:SI [
 (symbol_ref/u:SI (*.LC0) [flags 0x2])
 ] UNSPEC_GOTOFF))) [5  S8 A64])) 128 {*movdf_internal}
  (expr_list:REG_EQUIV (const_double:DF
 2.9997371893933895137251965934410691261292e-4
 [0x0.9d495182a99308p-11])
 (nil)))

 After reload we have new usage of r127 which is allocated to ecx which
 actually does not have any definition in this function at all.

 (insn 151 42 44 4 (set (reg:SI 0 ax [147])
 (plus:SI (reg:SI 2 cx [127])
 (const:SI (unspec:SI [
 (symbol_ref/u:SI (*.LC0) [flags 0x2])
 ] UNSPEC_GOTOFF test.cc:44 213 {*leasi}
  (expr_list:REG_EQUAL (symbol_ref/u:SI (*.LC0) [flags 0x2])
 (nil)))
 (insn 44 151 45 4 (set (reg:DF 21 xmm0 [orig:129 D.2450 ] [129])
 (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128])
 (mem/u/c:DF (reg:SI 0 ax [147]) [5  S8 A64]))) test.cc:44
 790 {*fop_df_comm_sse}
  (expr_list:REG_EQUAL (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128])
 (const_double:DF
 2.9997371893933895137251965934410691261292e-4
 [0x0.9d495182a99308p-11]))
 (nil)))

 Compilation string: g++ -m32 -O2 -mfpmath=sse -fPIE -S test.cc


Ok, Ilya.  I'll look at the problem this week.



Re: [C++ Patch] PR 58102 aka DR 1405

2014-09-02 Thread Paolo Carlini

Hi,

On 09/02/2014 04:28 PM, Jason Merrill wrote:

On 09/02/2014 10:17 AM, Paolo Carlini wrote:

Let's see if I can tease the case out...


I think you need to leave that hunk alone, and instead fix the new 
testcase by treating = {} more like {}, just as we already don't 
require a copy constructor call for copy-list-initialization.

I see. Thanks a lot for the tip!

Paolo.


Re: [PATCH AArch64] Rename [u]int32x1_t to [u]int32_t (resp 16x1, 8x1) in arm_neon.h

2014-09-02 Thread Marcus Shawcroft
On 24 July 2014 11:18, Alan Lawrence alan.lawre...@arm.com wrote:
 The ACLE spec does not mention the int32x1_t, uint32x1_t, int16x1_t,
 uint16x1_t, int8x1_t or uint8x1_t types currently in arm_neon.h, but just
 'standard' types int32_t, int16_t, etc. This patch is a global
 search-and-replace across arm_neon.h (and the tests that depend on it).

 Regressed (check-gcc and check-g++) on aarch64-none-elf.


OK for trunk.

 The question of backporting to 4.9 has been raised internally. There is no
 ABI issue, as int32x1_t was merely a typedef to int32_t (etc.). However
 there is a source code compatibility issue; code mentioning the 32x1 types,
 i.e. not conforming to the ACLE spec, which previously compiled, will no
 longer do so. My personal feeling is therefore not to backport this, but I
 would welcome input from maintainers (and others)...?

I doubt that there is currently much code out there that will be
affected by this change and that it would be better to back port and
hence limit the amount of code written against the broken arm_neon.h
during the life of the 4.9.x series. If there are no objections to
back porting in the next couple of days then go ahead.

/Marcus


Re: [C/C++ PATCH] Allow __atomic_always_lock_free in a static assert (PR c/62024)

2014-09-02 Thread Marek Polacek
On Wed, Aug 27, 2014 at 03:06:38PM -0400, Jason Merrill wrote:
 On 08/25/2014 07:43 AM, Marek Polacek wrote:
  * semantics.c (finish_static_assert): Strip no-op conversions.
 
 I think I'd rather strip these in cxx_eval_builtin_function_call so that we
 don't have to deal with them in various consumers.

I was playing with this again today and I've found out that I actually
don't need to touch C++ FE at all; maybe_constant_value returns
integer_cst in this case.  Don't know how I flubbed that.

Given that the C part are approved, I'm going to commit the following.

Bootstrapped/regtested on x86_64-linux.

2014-09-02  Marek Polacek  pola...@redhat.com

PR c/62024
* c-parser.c (c_parser_static_assert_declaration_no_semi): Strip no-op
conversions.

* g++.dg/cpp0x/pr62024.C: New test.
* gcc.dg/pr62024.c: New test.

diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index d634bb1..fc7bbaf 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -2058,6 +2058,8 @@ c_parser_static_assert_declaration_no_semi (c_parser 
*parser)
   if (TREE_CODE (value) != INTEGER_CST)
 {
   value = c_fully_fold (value, false, NULL);
+  /* Strip no-op conversions.  */
+  STRIP_TYPE_NOPS (value);
   if (TREE_CODE (value) == INTEGER_CST)
pedwarn (value_loc, OPT_Wpedantic, expression in static assertion 
 is not an integer constant expression);
diff --git gcc/testsuite/g++.dg/cpp0x/pr62024.C 
gcc/testsuite/g++.dg/cpp0x/pr62024.C
index e69de29..5f0640a 100644
--- gcc/testsuite/g++.dg/cpp0x/pr62024.C
+++ gcc/testsuite/g++.dg/cpp0x/pr62024.C
@@ -0,0 +1,7 @@
+// PR c/62024
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target sync_char_short }
+
+int *p;
+static_assert (__atomic_always_lock_free (1, p), );
+static_assert (__atomic_always_lock_free (1, 0), );
diff --git gcc/testsuite/gcc.dg/pr62024.c gcc/testsuite/gcc.dg/pr62024.c
index e69de29..79a0b79 100644
--- gcc/testsuite/gcc.dg/pr62024.c
+++ gcc/testsuite/gcc.dg/pr62024.c
@@ -0,0 +1,8 @@
+/* PR c/62024 */
+/* { dg-do compile } */
+/* { dg-options -std=gnu11 -Wpedantic } */
+/* { dg-require-effective-target sync_char_short } */
+
+int *p;
+_Static_assert (__atomic_always_lock_free (1, p), ); /* { dg-warning is not 
an integer constant } */
+_Static_assert (__atomic_always_lock_free (1, 0), ); /* { dg-warning is not 
an integer constant } */

Marek


Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread David Malcolm
On Tue, 2014-09-02 at 00:03 -0700, Andi Kleen wrote:
 From: Andi Kleen a...@linux.intel.com
 
 I noticed that with the trunk compiler a range of the new rtl
 inlines show up as hot in a profiler during stage1. I think
 that happens because stage1 is not using optimization
 and does not inline plain inline.  And these rtl inlines
 are very frequently called.

Sorry about that.

FWIW I'm working on some followup patches for the rtx-classes work that
ought to eliminate some of the is_a_helper calls; I hope to post them
in the next few days. [1]

I suspect the bulk of them currently are coming from the safe_as_a
rtx_insn * calls within NEXT_INSN and PREV_INSN; do you happen to have
information handy on that?

Dave

[1] (I have to take the rest of today off for a family matter).



Re: [PATCH] Add -fno-instrument-function

2014-09-02 Thread Andi Kleen
 Hmm, why not make -no-pg (does that exist?) and/or -mno-fentry

I'm not sure.

 do this?  That is, I don't see the need for a new option.

That would be really odd behavior. An yes/no option whose default
is controlled by other object files' command line.
And -pg would be for all files in LTO, and no-pg only for that file,
so not be symmetric.

I think an explicit different option has far cleaner semantics for
now (at least until the LTO option mess can be properly cleaned up)

-Andi


Re: please verify my mail to community.

2014-09-02 Thread Marat Zakirov

Hi all!

Here's a simple optimization patch for Asan. It stores alignment 
information into ASAN_CHECK which is then extracted by sanopt to reduce 
number of and 0x7 instructions for sufficiently aligned accesses. I 
checked it on linux kernel by comparing results of objdump -d -j .text 
vmlinux | grep and.*0x7, for optimized and regular cases. It 
eliminates 12% of and 0x7's.


No regressions. Sanitized GCC was successfully Asan-bootstrapped. No 
false positives were found in kernel.


--Marat

gcc/ChangeLog:

2014-09-02  Marat Zakirov  m.zaki...@samsung.com

	* asan.c (build_check_stmt): Alignment arg was added.
	(asan_expand_check_ifn): Optimization for alignment = 8.

gcc/testsuite/ChangeLog:

2014-09-02  Marat Zakirov  m.zaki...@samsung.com

	* c-c++-common/asan/red-align-1.c: New test.
	* c-c++-common/asan/red-align-2.c: New test.

diff --git a/gcc/asan.c b/gcc/asan.c
index 58e7719..aed5ede 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -1639,9 +1639,11 @@ build_check_stmt (location_t loc, tree base, tree len,
   if (end_instrumented)
 flags |= ASAN_CHECK_END_INSTRUMENTED;
 
-  g = gimple_build_call_internal (IFN_ASAN_CHECK, 3,
+  g = gimple_build_call_internal (IFN_ASAN_CHECK, 4,
   build_int_cst (integer_type_node, flags),
-  base, len);
+  base, len,
+  build_int_cst (integer_type_node,
+		 align/BITS_PER_UNIT));
   gimple_set_location (g, loc);
   if (before_p)
 gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -2434,6 +2436,7 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 
   tree base = gimple_call_arg (g, 1);
   tree len = gimple_call_arg (g, 2);
+  HOST_WIDE_INT align = tree_to_shwi (gimple_call_arg (g, 3));
 
   HOST_WIDE_INT size_in_bytes
 = is_scalar_access  tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1;
@@ -2547,7 +2550,10 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 	  gimple shadow_test = build_assign (NE_EXPR, shadow, 0);
 	  gimple_seq seq = NULL;
 	  gimple_seq_add_stmt (seq, shadow_test);
-	  gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR, base_addr, 7));
+	  /* Aligned (= 8 bytes) access do not need  7.  */
+	  if (align  8)
+	gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR,
+		 base_addr, 7));
 	  gimple_seq_add_stmt (seq, build_type_cast (shadow_type,
 		  gimple_seq_last (seq)));
 	  if (real_size_in_bytes  1)
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7ae60f3..54ade9f 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -55,4 +55,4 @@ DEF_INTERNAL_FN (UBSAN_CHECK_SUB, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (UBSAN_CHECK_MUL, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ABNORMAL_DISPATCHER, ECF_NORETURN, NULL)
 DEF_INTERNAL_FN (BUILTIN_EXPECT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
-DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W..)
+DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W...)
diff --git a/gcc/testsuite/c-c++-common/asan/red-align-1.c b/gcc/testsuite/c-c++-common/asan/red-align-1.c
new file mode 100644
index 000..1edb3a2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/red-align-1.c
@@ -0,0 +1,20 @@
+/* This tests aligment propagation to structure elem and
+   abcense of redudant  7.  */
+
+/* { dg-options -fdump-tree-sanopt } */
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -flto } {  } } */
+
+struct st {
+  int a;
+  int b;
+  int c;
+} __attribute__((aligned(16)));
+
+int foo (struct st * s_p)
+{
+  return s_p-a;
+}
+
+/* { dg-final { scan-tree-dump-times  7 0 sanopt } } */
+/* { dg-final { cleanup-tree-dump sanopt } } */
diff --git a/gcc/testsuite/c-c++-common/asan/red-align-2.c b/gcc/testsuite/c-c++-common/asan/red-align-2.c
new file mode 100644
index 000..161fe3c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/red-align-2.c
@@ -0,0 +1,20 @@
+/* This tests aligment propagation to structure elem and
+   abcense of redudant  7.  */
+
+/* { dg-options -fdump-tree-sanopt } */
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -flto } {  } } */
+
+struct st {
+  int a;
+  int b;
+  int c;
+} __attribute__((aligned(16)));
+
+int foo (struct st * s_p)
+{
+  return s_p-b;
+}
+
+/* { dg-final { scan-tree-dump-times  7 1 sanopt } } */
+/* { dg-final { cleanup-tree-dump sanopt } } */


Re: [C++ Patch] PR 58102 aka DR 1405

2014-09-02 Thread Paolo Carlini

Hi again,

On 09/02/2014 04:28 PM, Jason Merrill wrote:

On 09/02/2014 10:17 AM, Paolo Carlini wrote:

Let's see if I can tease the case out...


I think you need to leave that hunk alone, and instead fix the new 
testcase by treating = {} more like {}, just as we already don't 
require a copy constructor call for copy-list-initialization.
By the way, now I really understand the DR (the wording in the 
resolution clarifies what we are *already* doing correctly!).


Anyway, what about the below? Certainly works for the tests which we 
have got.


Thanks,
Paolo.



Index: cp/semantics.c
===
--- cp/semantics.c  (revision 214808)
+++ cp/semantics.c  (working copy)
@@ -9859,11 +9859,14 @@ cxx_eval_outermost_constant_expr (tree t, bool all
   verify_constant (r, allow_non_constant, non_constant_p, overflow_p);
 
   if (TREE_CODE (t) != CONSTRUCTOR
+   (TREE_CODE (t) != TARGET_EXPR
+ || TREE_CODE (TARGET_EXPR_INITIAL (t)) != AGGR_INIT_EXPR)
cp_has_mutable_p (TREE_TYPE (t)))
 {
   /* We allow a mutable type if the original expression was a
 CONSTRUCTOR so that we can do aggregate initialization of
-constexpr variables.  */
+constexpr variables.  Likewise for TARGET_EXPRs with an
+AGGR_INIT_EXPR as TARGET_EXPR_INITIAL (c++/58102).  */
   if (!allow_non_constant)
error (%qT cannot be the type of a complete constant expression 
   because it has mutable sub-objects, TREE_TYPE (t));
Index: testsuite/g++.dg/cpp0x/constexpr-mutable2.C
===
--- testsuite/g++.dg/cpp0x/constexpr-mutable2.C (revision 0)
+++ testsuite/g++.dg/cpp0x/constexpr-mutable2.C (working copy)
@@ -0,0 +1,10 @@
+// DR 1405, PR c++/58102
+// { dg-do compile { target c++11 } }
+
+struct S {
+  mutable int n;
+  constexpr S() : n() {}
+};
+
+constexpr S s1 {};
+constexpr S s2 = {};


Re: [PATCH AArch64 1/3] Don't disparage add/sub in SIMD registers

2014-09-02 Thread Marcus Shawcroft
On 18 August 2014 17:50, Alan Lawrence alan.lawre...@arm.com wrote:
 Well, you're right that it could be. So I presented the wrong justification.

 Clearly we would benefit from some better cost infrastructure here, ideally
 that is expressive, taken into account at all appropriate stages of the
 compiler, and tunable per core. I imagine that steps (patches) towards such
 infrastructure would be welcomed by both AArch64 maintainers and more
 widely.

 In the meantime, however, we must work with what we have. I'll still argue
 that we should remove the '!' (as per patch), however. As James has said,
 even if your add is more expensive in SIMD registers, the '!' still doesn't
 express that; and leaving it in affects code-generation on all cores. And it
 is inconsistent with other instructions.

Agreed and OK. /Marcus


[PATCH] Asan optimization for aligned accesses.

2014-09-02 Thread Marat Zakirov

Sorry for wrong subject!

On 09/02/2014 07:03 PM, Marat Zakirov wrote:

Hi all!

Here's a simple optimization patch for Asan. It stores alignment 
information into ASAN_CHECK which is then extracted by sanopt to 
reduce number of and 0x7 instructions for sufficiently aligned 
accesses. I checked it on linux kernel by comparing results of objdump 
-d -j .text vmlinux | grep and.*0x7, for optimized and regular 
cases. It eliminates 12% of and 0x7's.


No regressions. Sanitized GCC was successfully Asan-bootstrapped. No 
false positives were found in kernel.


--Marat



gcc/ChangeLog:

2014-09-02  Marat Zakirov  m.zaki...@samsung.com

	* asan.c (build_check_stmt): Alignment arg was added.
	(asan_expand_check_ifn): Optimization for alignment = 8.

gcc/testsuite/ChangeLog:

2014-09-02  Marat Zakirov  m.zaki...@samsung.com

	* c-c++-common/asan/red-align-1.c: New test.
	* c-c++-common/asan/red-align-2.c: New test.

diff --git a/gcc/asan.c b/gcc/asan.c
index 58e7719..aed5ede 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -1639,9 +1639,11 @@ build_check_stmt (location_t loc, tree base, tree len,
   if (end_instrumented)
 flags |= ASAN_CHECK_END_INSTRUMENTED;
 
-  g = gimple_build_call_internal (IFN_ASAN_CHECK, 3,
+  g = gimple_build_call_internal (IFN_ASAN_CHECK, 4,
   build_int_cst (integer_type_node, flags),
-  base, len);
+  base, len,
+  build_int_cst (integer_type_node,
+		 align/BITS_PER_UNIT));
   gimple_set_location (g, loc);
   if (before_p)
 gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -2434,6 +2436,7 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 
   tree base = gimple_call_arg (g, 1);
   tree len = gimple_call_arg (g, 2);
+  HOST_WIDE_INT align = tree_to_shwi (gimple_call_arg (g, 3));
 
   HOST_WIDE_INT size_in_bytes
 = is_scalar_access  tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1;
@@ -2547,7 +2550,10 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 	  gimple shadow_test = build_assign (NE_EXPR, shadow, 0);
 	  gimple_seq seq = NULL;
 	  gimple_seq_add_stmt (seq, shadow_test);
-	  gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR, base_addr, 7));
+	  /* Aligned (= 8 bytes) access do not need  7.  */
+	  if (align  8)
+	gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR,
+		 base_addr, 7));
 	  gimple_seq_add_stmt (seq, build_type_cast (shadow_type,
 		  gimple_seq_last (seq)));
 	  if (real_size_in_bytes  1)
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 7ae60f3..54ade9f 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -55,4 +55,4 @@ DEF_INTERNAL_FN (UBSAN_CHECK_SUB, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (UBSAN_CHECK_MUL, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (ABNORMAL_DISPATCHER, ECF_NORETURN, NULL)
 DEF_INTERNAL_FN (BUILTIN_EXPECT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
-DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W..)
+DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W...)
diff --git a/gcc/testsuite/c-c++-common/asan/red-align-1.c b/gcc/testsuite/c-c++-common/asan/red-align-1.c
new file mode 100644
index 000..1edb3a2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/red-align-1.c
@@ -0,0 +1,20 @@
+/* This tests aligment propagation to structure elem and
+   abcense of redudant  7.  */
+
+/* { dg-options -fdump-tree-sanopt } */
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -flto } {  } } */
+
+struct st {
+  int a;
+  int b;
+  int c;
+} __attribute__((aligned(16)));
+
+int foo (struct st * s_p)
+{
+  return s_p-a;
+}
+
+/* { dg-final { scan-tree-dump-times  7 0 sanopt } } */
+/* { dg-final { cleanup-tree-dump sanopt } } */
diff --git a/gcc/testsuite/c-c++-common/asan/red-align-2.c b/gcc/testsuite/c-c++-common/asan/red-align-2.c
new file mode 100644
index 000..161fe3c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/red-align-2.c
@@ -0,0 +1,20 @@
+/* This tests aligment propagation to structure elem and
+   abcense of redudant  7.  */
+
+/* { dg-options -fdump-tree-sanopt } */
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -flto } {  } } */
+
+struct st {
+  int a;
+  int b;
+  int c;
+} __attribute__((aligned(16)));
+
+int foo (struct st * s_p)
+{
+  return s_p-b;
+}
+
+/* { dg-final { scan-tree-dump-times  7 1 sanopt } } */
+/* { dg-final { cleanup-tree-dump sanopt } } */


Re: [PATCH AArch64 2/3] Add SIMD-reg variants of logical operators and/ior/xor/not

2014-09-02 Thread Marcus Shawcroft
On 12 August 2014 15:43, Alan Lawrence alan.lawre...@arm.com wrote:
 This patch adds SIMD register variants for and, ior, xor and not - similarly
 to add/sub, the H/W supports it, and it'll be more efficient if the values
 are there already, e.g. if passed as [u]int64x1_t parameters.

 gcc/ChangeLog:

 * config/aarch64/aarch64.md (optabmode3, one_cmplmode2):
 Add SIMD-register variant.
 * config/aarch64/iterators.md (Vbtype): Add value for SI.

OK /Marcus


Re: [PATCH AArch64 3/3] Fix XOR_one_cmpl pattern; add SIMD-reg variants for BIC,ORN,EON

2014-09-02 Thread Marcus Shawcroft
On 12 August 2014 15:55, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64.c (LOGICAL:optab_one_cmplmode3):
 Reparameterize to...
 (NLOGICAL:optab_one_cmplmode3): with extra SIMD-register
 variant.
 (xor_one_cmplmode3): New define_insn_and_split.

 * config/aarch64/iterators.md (NLOGICAL): New
 define_code_iterator.

 gcc/testsuite/ChangeLog:

 * gcc.target/aarch64/eon_1.c: New test.

OK /Marcus


Re: [PATCH AArch64 1/2] Improve codegen of vector compares inc. tst instruction

2014-09-02 Thread Marcus Shawcroft
On 19 August 2014 11:44, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
 TYPES_TST): Define.
 (aarch64_fold_builtin): Update pattern for cmtst.

 * config/aarch64/aarch64-protos.h
 (aarch64_const_vec_all_same_int_p):
 Declare.

 * config/aarch64/aarch64-simd-builtins.def (cmtst): Update
 qualifiers.

 * config/aarch64/aarch64-simd.md
 (aarch64_vcond_internalmodemode):
 Switch operands, separate out more cases, refactor.

 (aarch64_cmtstmode): Rewrite pattern to match (plus ... -1).

 * config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
 argument; rename old version to...
 (aarch64_const_vec_all_same_in_range_p): ...this.
 (aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.

 * config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.

 gcc/testsuite/ChangeLog:

 * gcc.target/aarch64/simd/int_comparisons.x: New file.
 * gcc.target/aarch64/simd/int_comparisons_1.c: New test.
 * gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.

OK /Marcus


Re: [PATCH AArch64 2/2] Remove vector compare/tst __builtins

2014-09-02 Thread Marcus Shawcroft
On 19 August 2014 14:43, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove
 code
 handling cmge, cmgt, cmeq, cmtst.

 * config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
 cmlt, cmgeu, cmgtu, cmtst): Remove.

 * config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
 vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
 vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
 vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.

OK /Marcus


[PATCH] aarch64: Enable Neon search_line_fast

2014-09-02 Thread Richard Henderson
Is it intentional or not that AArch64 does not define __ARM_NEON__?

Otherwise, here's a better way to fold the test bits.  AArch64 of
course does not have dN+1 overlap the high part of the qM register,
like AArch32, so the current 

  l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
  
implies extra register moves.  But on the good side, the armv8 ADDV
instruction allows two instructions to be removed from this fast path.

When built for 32-bit, the new form results in the same instruction
count; we simply keep using q registers instead of d registers
for two more insns.  Given that there are currently ifdefs involved,
it would certainly be possible to keep the 32-bit path unchanged, if 
that's thought to be valuable.

I did wonder if the armv8 stuff was supposed to be included in the
AArch32 arm_neon.h?  Is it just an oversight that it's missing?


r~


* lex.c (search_line_fast) [__ARM_NEON]: Use __FOO not __FOO__
to detect neon support.  Fold the comparison using ADDV when
available.


diff --git a/libcpp/lex.c b/libcpp/lex.c
index 5366dad..6d1823e 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -638,7 +638,7 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
   }
 }
 
-#elif defined (__ARM_NEON__)
+#elif defined (__ARM_NEON)
 #include arm_neon.h
 
 static const uchar *
@@ -649,6 +649,7 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+  const int16x8_t shift = { 0, 0, 0, 0, 8, 8, 8, 8 };
 
   unsigned int misalign, found, mask;
   const uint8_t *p;
@@ -670,10 +671,8 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
 
   do
 {
-  uint8x8_t l;
-  uint16x4_t m;
-  uint32x2_t n;
   uint8x16_t t, u, v, w;
+  uint16x8_t l;
 
   p += 16;
   data = vld1q_u8 (p);
@@ -685,12 +684,24 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
   v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
   w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
   t = vandq_u8 (vorrq_u8 (v, w), xmask);
-  l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
-  m = vpaddl_u8 (l);
-  n = vpaddl_u16 (m);
-  
-  found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n, 
- vshr_n_u64 ((uint64x1_t) n, 24)), 0);
+
+  l = vpaddlq_u8 (t);
+  l = vshlq_u16 (l, shift);
+
+  /* ??? Ideally, this would be if (__ARM_ARCH = 8) since the ADDV insn
+reduces the instruction count by two.  But vaddvq is not present in
+the arm32 arm_neon.h, nor does AArch64 define __ARM_ARCH.  */
+#ifdef __aarch64__
+  found = vaddvq_u16 (l);
+#else
+  {
+   uint32x4_t m = vpaddlq_u16 (l);
+   uint64x2_t n = vpaddlq_u32 (m);
+   uint64x1_t o = vget_low_u64 (n) + vget_high_u64 (n);
+   found = vget_lane_u32 ((uint32x2_t)o, 0);
+  }
+#endif
+
   found = mask;
 }
   while (!found);
-- 
1.9.3



Re: [FORTRAN PATCH] Two -Wlogical-not-parentheses fixes (PR fortran/62270)

2014-09-02 Thread Tobias Burnus

Marek Polacek wrote:
 This patch fixes the last two spots where -Wlogical-not-parentheses
 warns.  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62270#c3
 if you want more info about the changes.

 Bootstrapped/regtested on x86_64-linux, ok for trunk?

Looks good to me. Thanks for the patch!

Tobias

 2014-09-02  Marek Polacek  pola...@redhat.com

   PR fortran/62270
   * interface.c (compare_parameter): Fix condition.
   * trans-expr.c (gfc_conv_procedure_call): Likewise.
 
   * gfortran.dg/pointer_intent_7.f90: Adjust dg-error.


[PATCH][ARM] Fix %N output modifier

2014-09-02 Thread Kyrill Tkachov

Hi all,

Following the transition to UAL I noticed that the %N output modifier 
doesn't really work. It calls fp_const_from_val to get the VFP encoding 
from a real value, but fp_const_from_val only supports the floating 
point zero constant and ICEs for all other values, making it useless for 
pretty much all purposes.

For example, the testcase in this patch ICEs.

With the conversion to UAL we no longer output the VFP encoded form of 
floating point constants but rather their natural representation. This 
patch makes sure that %N negates its operand properly and outputs it as 
a normal floating point number. It also handles operand lossage (if, for 
example, the user passed in a register instead of a constant).



Ok for upstream?

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* config/arm/arm.c (fp_const_from_val): Delete prototype and
definition.
(arm_print_operand): Don't use fp_const_from_val in the 'N' case.
Report unsupported operand.

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* gcc.target/arm/n_output_modifier_1.c: New test.commit 592aa40e87285c53229ccf544691611e9c78b578
Author: Kyrylo Tkachov kyrylo.tkac...@arm.com
Date:   Wed Aug 13 14:08:02 2014 +0100

[ARM] Fix %N output modifier

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 5f7cbb1..a48ca4e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -100,7 +100,6 @@ static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update
 static void arm_print_operand (FILE *, rtx, int);
 static void arm_print_operand_address (FILE *, rtx);
 static bool arm_print_operand_punct_valid_p (unsigned char code);
-static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 static arm_cc get_arm_condition_code (rtx);
 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 static const char *output_multi_immediate (rtx *, const char *, const char *,
@@ -17534,17 +17533,6 @@ arm_reorg (void)
 
 /* Routines to output assembly language.  */
 
-/* Return string representation of passed in real value.  */
-static const char *
-fp_const_from_val (REAL_VALUE_TYPE *r)
-{
-  if (!fp_consts_inited)
-init_fp_table ();
-
-  gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
-  return 0;
-}
-
 /* OPERANDS[0] is the entire list of insns that constitute pop,
OPERANDS[1] is the base register, RETURN_PC is true iff return insn
is in the list, UPDATE is true iff the list contains explicit
@@ -21533,13 +21521,21 @@ arm_print_operand (FILE *stream, rtx x, int code)
 
 case 'N':
   {
-	REAL_VALUE_TYPE r;
-	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
-	r = real_value_negate (r);
-	fprintf (stream, %s, fp_const_from_val (r));
-  }
-  return;
+if (CONST_DOUBLE_P (x))
+  {
+char fpstr[20];
+REAL_VALUE_TYPE r;
+
+REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+r = real_value_negate (r);
+real_to_decimal (fpstr, r, sizeof (fpstr), 0, 1);
+fprintf (stream, %s, fpstr);
+  }
+else
+  output_operand_lossage (Unsupported operand for code '%c', code);
 
+return;
+  }
 /* An integer or symbol address without a preceding # sign.  */
 case 'c':
   switch (GET_CODE (x))
diff --git a/gcc/testsuite/gcc.target/arm/n_output_modifier_1.c b/gcc/testsuite/gcc.target/arm/n_output_modifier_1.c
new file mode 100644
index 000..e94914b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/n_output_modifier_1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_vfp_ok } */
+/* { dg-final { scan-assembler-times -5.0e-1 1 } } */
+
+
+int
+foo (int a)
+{
+  int result = 0;
+   __asm__ (%0, %N1
+: =r(result)
+: Dt(0.5)
+: );
+  return result;
+}
+

[PATCH][ARM] Fix up vectoriser dumping and scanning in some tests

2014-09-02 Thread Kyrill Tkachov

Hi all,

I noticed for some reason that these tests don't properly dump the 
vectoriser pass before scanning, but it doesn't show up because the 
corresponding target predicate in the scan-tree-dump directive was never 
true on arm!
I think these tests were initially supposed to go somewhere in the 
midend but ended up being in gcc.target/arm/.
Since they're in gcc.target/arm, just add the ARMv8 NEON options and 
expect it to always vectorise.


These tests pass on arm just fine with these changes.

Ok for trunk?

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* gcc.target/arm/vect-rounding-btruncf.c: Dump vectoriser output.
Remove restriction on tree dump scan.
* gcc.target/arm/vect-rounding-ceilf.c: Likewise.
* gcc.target/arm/vect-rounding-floorf.c: Likewise.
* gcc.target/arm/vect-rounding-roundf.c: Likewise.commit 0a1ce57744f00efd14b93b8f6decb6d5eff20c66
Author: Kyrylo Tkachov kyrylo.tkac...@arm.com
Date:   Fri Aug 22 17:25:29 2014 +0100

[ARM] Fix testsuite flags for vect tests

diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
index ff033d4..5616837 100644
--- a/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
+++ b/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
-/* { dg-options -O2 -ffast-math -ftree-vectorize } */
+/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */
 /* { dg-add-options arm_v8_neon } */
 
 #define N 32
@@ -14,5 +14,5 @@ foo (float *output, float *input)
 output[i] = __builtin_truncf (input[i]);
 }
 
-/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_btruncf } } } */
+/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */
 /* { dg-final { cleanup-tree-dump vect } } */
diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
index b54f358..cb8f1d5 100644
--- a/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
+++ b/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
-/* { dg-options -O2 -ffast-math -ftree-vectorize } */
+/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */
 /* { dg-add-options arm_v8_neon } */
 
 #define N 32
@@ -14,5 +14,5 @@ foo (float *output, float *input)
 output[i] = __builtin_ceilf (input[i]);
 }
 
-/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_ceilf } } } */
+/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */
 /* { dg-final { cleanup-tree-dump vect } } */
diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
index 02e188d..bf68af7 100644
--- a/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
+++ b/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
-/* { dg-options -O2 -ffast-math -ftree-vectorize } */
+/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */
 /* { dg-add-options arm_v8_neon } */
 
 #define N 32
@@ -14,5 +14,5 @@ foo (float *output, float *input)
 output[i] = __builtin_floorf (input[i]);
 }
 
-/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_floorf } } } */
+/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */
 /* { dg-final { cleanup-tree-dump vect } } */
diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
index 85e2058..7c0a1b4 100644
--- a/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
+++ b/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_v8_neon_ok } */
-/* { dg-options -O2 -ffast-math -ftree-vectorize } */
+/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */
 /* { dg-add-options arm_v8_neon } */
 
 #define N 32
@@ -14,5 +14,5 @@ foo (float *output, float *input)
 output[i] = __builtin_roundf (input[i]);
 }
 
-/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_roundf } } } */
+/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */
 /* { dg-final { cleanup-tree-dump vect } } */

Re: [PATCH][AArch64] Use CC_Z and CC_NZ with csinc and similar instructions

2014-09-02 Thread Kyrill Tkachov

Hi Richard,

Sorry for the delay.

On 19/08/14 17:09, Richard Henderson wrote:

(define_special_predicate cc_register_zero
   (match_code reg)
{
   return (REGNO (op) == CC_REGNUM
(GET_MODE (op) == CCmode
   || GET_MODE (op) == CC_Zmode
   || GET_MODE (op) == CC_NZmode));
})

... and now that I read the backend more closely, I see _zero was a bad name.

But more importantly, I see no connection between the comparison used and the
CCmode being accepted.  And if we fix that, why are you restricting to just Z
and NZ?  What's wrong with e.g. CFPmode?


I'm not sure why restricted the modes for csinc.


In the i386 backend, we check comparison+mode correspondence like

   (match_operator 4 ix86_carry_flag_operator
  [(match_operand 3 flags_reg_operand) (const_int 0)])

I think you'll want something similar.  In the case of CSINC, we can accept all
conditions, so let's start with the most general:

   (match_operator:GPI 2 aarch64_comparison_operation
 [(reg CC_REGNUM) (const_int 0)]

or even

   (match_operand:GPI 2 aarch64_comparison_operation )

with

(define_predicate aarch64_comparison_operation
 (match_code eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
 unordered,ordered,unlt,unle,unge,ungt)
{
   if (XEXP (op, 1) != const0_rtx)
 return false;
   rtx op0 = XEXP (op, 0);
   if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
 return false;
   return aarch64_get_condition_code (op) = 0;
})

where aarch64_get_condition_code is
   (1) exported
   (2) adjusted to return int not unsigned
   (3) adjusted to not abort, but return -1 for invalid combinations.

and the two existing users of aarch64_get_condition_code are adjusted to
gcc_assert that the return value is valid.


Implementing that seems to work fine. Bootstrap and testing were successful.
How's this version then?

Kyrill

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* config/aarch64/predicates.md (aarch64_comparison_operation):
New special predicate.
* config/aarch64/aarch64.md (*csinc2mode_insn): Use
aarch64_comparison_operation instead of matching an operator.
Update operand numbers.
(csinc3mode_insn): Likewise.
(*csinv3mode_insn): Likewise.
(*csneg3mode_insn): Likewise.
(ffsmode2): Update gen_csinc3mode_insn callsite.
* config/aarch64/aarch64.c (aarch64_get_condition_code): Export.
Return -1 instead of aborting on invalid condition codes.
(aarch64_print_operand): Update aarch64_get_condition_code callsites
to assert that the returned condition code is valid.



r~

commit a70dc696b967196d6662479a44682e3f423377ac
Author: Kyrylo Tkachov kyrylo.tkac...@arm.com
Date:   Mon Aug 4 16:49:24 2014 +0100

[AArch64] Generalise condition code usage for csinc pattterns

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index b5335bf..d3be619 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -174,6 +174,7 @@ struct tune_params
 };
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
+int aarch64_get_condition_code (rtx);
 bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
 bool aarch64_cannot_change_mode_class (enum machine_mode,
    enum machine_mode,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ba45d00..809d562 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3589,7 +3589,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
   return CCmode;
 }
 
-static unsigned
+int
 aarch64_get_condition_code (rtx x)
 {
   enum machine_mode mode = GET_MODE (XEXP (x, 0));
@@ -3616,7 +3616,7 @@ aarch64_get_condition_code (rtx x)
 	case UNLE: return AARCH64_LE;
 	case UNGT: return AARCH64_HI;
 	case UNGE: return AARCH64_PL;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
   break;
 
@@ -3633,7 +3633,7 @@ aarch64_get_condition_code (rtx x)
 	case GTU: return AARCH64_HI;
 	case LEU: return AARCH64_LS;
 	case LTU: return AARCH64_CC;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
   break;
 
@@ -3652,7 +3652,7 @@ aarch64_get_condition_code (rtx x)
 	case GTU: return AARCH64_CC;
 	case LEU: return AARCH64_CS;
 	case LTU: return AARCH64_HI;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
   break;
 
@@ -3663,7 +3663,7 @@ aarch64_get_condition_code (rtx x)
 	case EQ: return AARCH64_EQ;
 	case GE: return AARCH64_PL;
 	case LT: return AARCH64_MI;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
   break;
 
@@ -3672,12 +3672,12 @@ aarch64_get_condition_code (rtx x)
 	{
 	case NE: return AARCH64_NE;
 	case EQ: return AARCH64_EQ;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
   break;
 
 default:
-  gcc_unreachable ();
+  return -1;
   break;
 }
 }
@@ -3795,39 +3795,48 @@ aarch64_print_operand (FILE *f, rtx x, char code)
   break;
 
 case 'm':
-  /* Print a condition (eq, ne, etc).  

[PATCH][ARM] Disable store_minmaxsi pattern for arm_restrict_i

2014-09-02 Thread Kyrill Tkachov

Hi all,

The store_minmaxsi produces a cmp + ite + 2 conditional stores and is 
thus inappropriate when the ARMv8-A IT block rules are in place. 
Previously we had disabled it for speed optimisations, but it should be 
disabled completely when -mrestrict-it is in effect.


Ok for trunk and 4.9?

Tested arm-none-eabi.

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* config/arm/arm.md (*store_minmaxsi): Disable for arm_restrict_it.commit 1a721e0ea3e204adff36bdfd803a2071354421b2
Author: Kyrylo Tkachov kyrylo.tkac...@arm.com
Date:   Mon Sep 1 12:11:10 2014 +0100

[ARM] Disable store_minmaxsi for arm_restrict_it

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 766b646..cb6cc81 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3449,7 +3449,7 @@ (define_insn *store_minmaxsi
 	 [(match_operand:SI 1 s_register_operand r)
 	  (match_operand:SI 2 s_register_operand r)]))
(clobber (reg:CC CC_REGNUM))]
-  TARGET_32BIT  optimize_function_for_size_p (cfun)
+  TARGET_32BIT  optimize_function_for_size_p (cfun)  !arm_restrict_it
   *
   operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
 operands[1], operands[2]);

[PATCH][ARM][2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions

2014-09-02 Thread Kyrill Tkachov

Hi all,

In continuation of patch [1/2]...
We can use the vector forms of the vcvt{a,p,m} instructions to vectorise 
the l{round, ceil, floor}f functions.
Builtins are added and the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 
implementation is updated to wire up the vectorised forms of these 
functions to the midend.


Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

PR target/62275
* config/arm/neon.md
 (neon_vcvtNEON_VCVT:nvrint_variantsu_optabVCVTF:mode
v_cmp_result): New pattern.
* config/arm/iterators.md (NEON_VCVT): New int iterator.
* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
* config/arm/arm.c (arm_builtin_vectorized_function): Handle
BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

PR target/62275
* gcc.target/arm/vect-lceilf_1.c: New test.
* gcc.target/arm/vect-lfloorf_1.c: Likewise.
* gcc.target/arm/vect-lroundf_1.c: Likewise.commit 3854d95bace665f6d9d8c007702b6d26f6fe07c2
Author: Kyrylo Tkachov kyrylo.tkac...@arm.com
Date:   Fri Aug 22 17:23:20 2014 +0100

[ARM] Vectorise lroundf, lfloorf, lceilf on ARMv8-A

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index ff66c60..c3b8518 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -29945,6 +29945,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 {
   enum machine_mode in_mode, out_mode;
   int in_n, out_n;
+  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
 
   if (TREE_CODE (type_out) != VECTOR_TYPE
   || TREE_CODE (type_in) != VECTOR_TYPE)
@@ -29990,6 +29991,36 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 return ARM_FIND_VRINT_VARIANT (vrintz);
   case BUILT_IN_ROUNDF:
 return ARM_FIND_VRINT_VARIANT (vrinta);
+#undef ARM_CHECK_BUILTIN_MODE_1
+#define ARM_CHECK_BUILTIN_MODE_1(C) \
+  (out_mode == SImode  out_n == C \
+in_mode == SFmode  in_n == C)
+
+#define ARM_FIND_VCVT_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
+   : (ARM_CHECK_BUILTIN_MODE (4) \
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
+ : NULL_TREE))
+
+#define ARM_FIND_VCVTU_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
+   : (ARM_CHECK_BUILTIN_MODE (4) \
+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
+ : NULL_TREE))
+  case BUILT_IN_LROUNDF:
+return out_unsigned_p
+ ? ARM_FIND_VCVTU_VARIANT (vcvta)
+ : ARM_FIND_VCVT_VARIANT (vcvta);
+  case BUILT_IN_LCEILF:
+return out_unsigned_p
+ ? ARM_FIND_VCVTU_VARIANT (vcvtp)
+ : ARM_FIND_VCVT_VARIANT (vcvtp);
+  case BUILT_IN_LFLOORF:
+return out_unsigned_p
+ ? ARM_FIND_VCVTU_VARIANT (vcvtm)
+ : ARM_FIND_VCVT_VARIANT (vcvtm);
 #undef ARM_CHECK_BUILTIN_MODE
 #define ARM_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == N##Imode  out_n == C \
@@ -30020,9 +30051,12 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 }
   return NULL_TREE;
 }
+#undef ARM_FIND_VCVT_VARIANT
+#undef ARM_FIND_VCVTU_VARIANT
 #undef ARM_CHECK_BUILTIN_MODE
 #undef ARM_FIND_VRINT_VARIANT
 
+
 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
 static HOST_WIDE_INT
 arm_vector_alignment (const_tree type)
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index f4531f3..efe5bda 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -141,6 +141,18 @@ VAR2 (RINT, vrintp, v2sf, v4sf),
 VAR2 (RINT, vrintm, v2sf, v4sf),
 VAR2 (RINT, vrintz, v2sf, v4sf),
 VAR2 (RINT, vrintx, v2sf, v4sf),
+VAR1 (RINT, vcvtav2sf, v2si),
+VAR1 (RINT, vcvtav4sf, v4si),
+VAR1 (RINT, vcvtauv2sf, v2si),
+VAR1 (RINT, vcvtauv4sf, v4si),
+VAR1 (RINT, vcvtpv2sf, v2si),
+VAR1 (RINT, vcvtpv4sf, v4si),
+VAR1 (RINT, vcvtpuv2sf, v2si),
+VAR1 (RINT, vcvtpuv4sf, v4si),
+VAR1 (RINT, vcvtmv2sf, v2si),
+VAR1 (RINT, vcvtmv4sf, v4si),
+VAR1 (RINT, vcvtmuv2sf, v2si),
+VAR1 (RINT, vcvtmuv4sf, v4si),
 VAR1 (VTBL, vtbl1, v8qi),
 VAR1 (VTBL, vtbl2, v8qi),
 VAR1 (VTBL, vtbl3, v8qi),
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index f7e0e14..021372a 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -223,6 +223,8 @@ (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
 (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
   UNSPEC_NVRINTX UNSPEC_NVRINTA 

[PATCH][ARM][1/2] Implement lceil, lfloor, lround optabs with new ARMv8-A instructions

2014-09-02 Thread Kyrill Tkachov

Hi all,

This patch implements the {lceil, lfloor, lround}si{sf, df}2 optabs in a 
similar way to fcvt in aarch64. We use the new ARMv8 FP convert with 
rounding instructions vcvt{a,p,m} for that.


Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

PR target/62275
* config/arm/iterators.md (FIXUORS): New code iterator.
(VCVT): New int iterator.
(su_optab): New code attribute.
(su): Likewise.
* config/arm/vfp.md (lvrint_patternsu_optabmodesi2): New pattern.

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

PR target/62275
* gcc.target/arm/lceil-vcvt_1.c: New test.
* gcc.target/arm/lfloor-vcvt_1.c: Likewise.
* gcc.target/arm/lround-vcvt_1.c: Likewise.commit ba3ec05be54d74ee53d287dfa4eb7b5508292e59
Author: Kyrylo Tkachov kyrylo.tkac...@arm.com
Date:   Thu Aug 21 12:28:00 2014 +0100

[ARM] Implement ARMv8-A vcvt* optabs

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 6fe6eef..f7e0e14 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -194,6 +194,9 @@ (define_code_iterator SE [sign_extend zero_extend])
 ;; Right shifts
 (define_code_iterator rshifts [ashiftrt lshiftrt])
 
+;; Iterator for integer conversions
+(define_code_iterator FIXUORS [fix unsigned_fix])
+
 ;; Binary operators whose second operand can be shifted.
 (define_code_iterator shiftable_ops [plus minus ior xor and])
 
@@ -215,6 +218,8 @@ (define_code_attr arith_shift_insn
 (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
 UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
 
+(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
+
 (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
   UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
 
@@ -519,6 +524,13 @@ (define_code_attr optab [(ltu ltu) (geu geu)])
 ;; Assembler mnemonics for signedness of widening operations.
 (define_code_attr US [(sign_extend s) (zero_extend u)])
 
+;; Signedness suffix for float-fixed conversions.  Empty for signed
+;; conversion.
+(define_code_attr su_optab [(fix ) (unsigned_fix u)])
+
+;; Sign prefix to use in instruction type suffixes, i.e. s32, u32.
+(define_code_attr su [(fix s) (unsigned_fix u)])
+
 ;; Right shifts
 (define_code_attr shift [(ashiftrt ashr) (lshiftrt lshr)])
 (define_code_attr shifttype [(ashiftrt signed) (lshiftrt unsigned)])
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 90e001c..a203449 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -1306,6 +1306,18 @@ (define_insn vrint_patternSDF:mode2
(set_attr conds vrint_conds)]
 )
 
+;; Implements the lround, lfloor and lceil optabs.
+(define_insn lvrint_patternsu_optabmodesi2
+  [(set (match_operand:SI 0 register_operand =t)
+(FIXUORS:SI (unspec:SDF
+[(match_operand:SDF 1
+   register_operand F_constraint)] VCVT)))]
+  TARGET_HARD_FLOAT  TARGET_FPU_ARMV8 vfp_double_cond
+  vcvtvrint_variant%?.su32.V_if_elem\\t%0, %V_reg1
+  [(set_attr predicable no)
+   (set_attr type f_cvtf2i)]
+)
+
 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
 ;; operand will be returned when both operands are zero (i.e. they may not
diff --git a/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c b/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
new file mode 100644
index 000..bbe4271
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_vfp_ok } */
+/* { dg-options -O2 -march=armv8-a } */
+/* { dg-add-options arm_v8_vfp } */
+
+int
+foofloat (float x)
+{
+  return __builtin_lceilf (x);
+}
+
+/* { dg-final { scan-assembler-times vcvtp.s32.f32\ts\[0-9\]+, s\[0-9\]+ 1 } } */
+
+
+int
+foodouble (double x)
+{
+  return __builtin_lceil (x);
+}
+
+/* { dg-final { scan-assembler-times vcvtp.s32.f64\ts\[0-9\]+, d\[0-9\]+ 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c b/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
new file mode 100644
index 000..88671d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_vfp_ok } */
+/* { dg-options -O2 -march=armv8-a } */
+/* { dg-add-options arm_v8_vfp } */
+
+int
+foofloat (float x)
+{
+  return __builtin_lfloorf (x);
+}
+
+/* { dg-final { scan-assembler-times vcvtm.s32.f32\ts\[0-9\]+, s\[0-9\]+ 1 } } */
+
+
+int
+foodouble (double x)
+{
+  return __builtin_lfloor (x);
+}
+
+/* { dg-final { scan-assembler-times vcvtm.s32.f64\ts\[0-9\]+, d\[0-9\]+ 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c b/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
new file mode 100644
index 000..8b1f6a7
--- /dev/null
+++ 

Re: [C++ Patch] PR 58102 aka DR 1405

2014-09-02 Thread Jason Merrill

On 09/02/2014 11:07 AM, Paolo Carlini wrote:

Anyway, what about the below? Certainly works for the tests which we
have got.


Hmm.  This is definitely an improvement, as it allows a subset of

a non-volatile glvalue of literal type that refers to a non-volatile 
object whose lifetime began within the evalution of e


But it doesn't cover all of that, and in any case we shouldn't need to 
explicitly handle that just for types with mutable subobjects.


I think perhaps it would be better to remove that hunk as in your 
initial patch and replace it with a check in constant_value_1 and an 
explanation in non_const_var_error.


Jason



Re: [PATCH][AArch64] Use CC_Z and CC_NZ with csinc and similar instructions

2014-09-02 Thread Richard Henderson
On 09/02/2014 08:34 AM, Kyrill Tkachov wrote:
 2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com
 
 * config/aarch64/predicates.md (aarch64_comparison_operation):
 New special predicate.
 * config/aarch64/aarch64.md (*csinc2mode_insn): Use
 aarch64_comparison_operation instead of matching an operator.
 Update operand numbers.
 (csinc3mode_insn): Likewise.
 (*csinv3mode_insn): Likewise.
 (*csneg3mode_insn): Likewise.
 (ffsmode2): Update gen_csinc3mode_insn callsite.
 * config/aarch64/aarch64.c (aarch64_get_condition_code): Export.
 Return -1 instead of aborting on invalid condition codes.
 (aarch64_print_operand): Update aarch64_get_condition_code callsites
 to assert that the returned condition code is valid.

Looks good to me.
Note that you missed the ChangeLog entry for aarch64-protos.h.


r~


Re: [RFA:] testsuite: robustify g++.old-deja/g++.eh/badalloc1.C for 64-bit systems

2014-09-02 Thread Mike Stump
On Sep 2, 2014, at 3:28 AM, Hans-Peter Nilsson hans-peter.nils...@axis.com 
wrote:
 In a native x86_64-linux toolchain in which
 eh-table-registration is done explicitly (i.e. dl_iterate_phdr
 and PT_GNU_EH_FRAME is *not* assumed, as that eliminates the
 issue), the memory overhead for exception-initialization goes
 beyond the 32768 bytes assumed in badalloc1.C and the test fails
 for reasons not intended by the test.

 Ok to commit?

Ok.

If someone with deeper insight into the details wants to chime in, or robustify 
it some more…  the test strikes me as unfortunately brittle.

I’d be tempted to make it the maximal size on any system (subject to smaller 
systems limiting it, cause they are small).

Re: [PATCH][ARM][1/2] Implement lceil, lfloor, lround optabs with new ARMv8-A instructions

2014-09-02 Thread Ramana Radhakrishnan



On 02/09/14 16:34, Kyrill Tkachov wrote:

Hi all,

This patch implements the {lceil, lfloor, lround}si{sf, df}2 optabs in a
similar way to fcvt in aarch64. We use the new ARMv8 FP convert with
rounding instructions vcvt{a,p,m} for that.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?


Ok .

Ramana



Thanks,
Kyrill

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

  PR target/62275
  * config/arm/iterators.md (FIXUORS): New code iterator.
  (VCVT): New int iterator.
  (su_optab): New code attribute.
  (su): Likewise.
  * config/arm/vfp.md (lvrint_patternsu_optabmodesi2): New pattern.

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

  PR target/62275
  * gcc.target/arm/lceil-vcvt_1.c: New test.
  * gcc.target/arm/lfloor-vcvt_1.c: Likewise.
  * gcc.target/arm/lround-vcvt_1.c: Likewise.



Re: [PATCH][ARM][2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions

2014-09-02 Thread Ramana Radhakrishnan



On 02/09/14 16:34, Kyrill Tkachov wrote:

Hi all,

In continuation of patch [1/2]...
We can use the vector forms of the vcvt{a,p,m} instructions to vectorise
the l{round, ceil, floor}f functions.
Builtins are added and the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
implementation is updated to wire up the vectorised forms of these
functions to the midend.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?


Ok - thanks.

Ramana


Thanks,
Kyrill

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

  PR target/62275
  * config/arm/neon.md
   (neon_vcvtNEON_VCVT:nvrint_variantsu_optabVCVTF:mode
  v_cmp_result): New pattern.
  * config/arm/iterators.md (NEON_VCVT): New int iterator.
  * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
  vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
  vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
  * config/arm/arm.c (arm_builtin_vectorized_function): Handle
  BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

  PR target/62275
  * gcc.target/arm/vect-lceilf_1.c: New test.
  * gcc.target/arm/vect-lfloorf_1.c: Likewise.
  * gcc.target/arm/vect-lroundf_1.c: Likewise.



Re: [PATCH] aarch64: Enable Neon search_line_fast

2014-09-02 Thread Ramana Radhakrishnan



On 02/09/14 16:28, Richard Henderson wrote:

Is it intentional or not that AArch64 does not define __ARM_NEON__?


Yes I remember so, __ARM_NEON__ is not ACLE compatible so we haven't 
defined it for AArch64 - on AArch32 and AArch64 we now have __ARM_NEON 
defined so that's the macro to be used.




Otherwise, here's a better way to fold the test bits.  AArch64 of
course does not have dN+1 overlap the high part of the qM register,
like AArch32, so the current

   l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));

implies extra register moves.  But on the good side, the armv8 ADDV
instruction allows two instructions to be removed from this fast path.


Cool.



When built for 32-bit, the new form results in the same instruction
count; we simply keep using q registers instead of d registers
for two more insns.  Given that there are currently ifdefs involved,
it would certainly be possible to keep the 32-bit path unchanged, if
that's thought to be valuable.


The ADDV instruction isn't available on the AArch32 side IIRC. Given 
that situation there is no intrinsic for ADDV on the AArch32 side which 
is why this doesn't exist in the AArch32 version of arm_neon.h :(


I'll need to take a look at the new code generated for AArch32 and will 
probably be able to get back tomorrow as I'll disappear shortly.




I did wonder if the armv8 stuff was supposed to be included in the
AArch32 arm_neon.h?  Is it just an oversight that it's missing?


The ARMv8 stuff is included for arm_neon.h - I believe we've implemented 
everything that's ARMv8 specific in arm_neon.h for AArch32 . Anything 
missing would be an oversight.



regards
Ramana






r~


* lex.c (search_line_fast) [__ARM_NEON]: Use __FOO not __FOO__
to detect neon support.  Fold the comparison using ADDV when
available.


diff --git a/libcpp/lex.c b/libcpp/lex.c
index 5366dad..6d1823e 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -638,7 +638,7 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
}
  }

-#elif defined (__ARM_NEON__)
+#elif defined (__ARM_NEON)
  #include arm_neon.h

  static const uchar *
@@ -649,6 +649,7 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
const uint8x16_t repl_qm = vdupq_n_u8 ('?');
const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
+  const int16x8_t shift = { 0, 0, 0, 0, 8, 8, 8, 8 };

unsigned int misalign, found, mask;
const uint8_t *p;
@@ -670,10 +671,8 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)

do
  {
-  uint8x8_t l;
-  uint16x4_t m;
-  uint32x2_t n;
uint8x16_t t, u, v, w;
+  uint16x8_t l;

p += 16;
data = vld1q_u8 (p);
@@ -685,12 +684,24 @@ search_line_fast (const uchar *s, const uchar *end 
ATTRIBUTE_UNUSED)
v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
t = vandq_u8 (vorrq_u8 (v, w), xmask);
-  l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
-  m = vpaddl_u8 (l);
-  n = vpaddl_u16 (m);
-
-  found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
- vshr_n_u64 ((uint64x1_t) n, 24)), 0);
+
+  l = vpaddlq_u8 (t);
+  l = vshlq_u16 (l, shift);
+
+  /* ??? Ideally, this would be if (__ARM_ARCH = 8) since the ADDV insn
+reduces the instruction count by two.  But vaddvq is not present in
+the arm32 arm_neon.h, nor does AArch64 define __ARM_ARCH.  */
+#ifdef __aarch64__
+  found = vaddvq_u16 (l);
+#else
+  {
+   uint32x4_t m = vpaddlq_u16 (l);
+   uint64x2_t n = vpaddlq_u32 (m);
+   uint64x1_t o = vget_low_u64 (n) + vget_high_u64 (n);
+   found = vget_lane_u32 ((uint32x2_t)o, 0);
+  }
+#endif
+
found = mask;
  }
while (!found);



[PATCH] Enable -Wlogical-not-parentheses by -Wall

2014-09-02 Thread Marek Polacek
Now that PR61271 and PR62270 have been fixed, we can enable
-Wlogical-not-parentheses by -Wall.  I think this warning proved
useful.

Bootstrapped/regtested on x86_64-linux and ppc64-linux, ok for trunk?

2014-08-26  Marek Polacek  pola...@redhat.com

* doc/invoke.texi: Document that -Wlogical-not-parentheses is enabled
by -Wall.
c-family/
* c.opt (Wlogical-not-parentheses): Enable by -Wall.

diff --git gcc/c-family/c.opt gcc/c-family/c.opt
index 210a099..643f256 100644
--- gcc/c-family/c.opt
+++ gcc/c-family/c.opt
@@ -519,7 +519,7 @@ C ObjC C++ ObjC++ Var(warn_logical_op) Init(0) Warning
 Warn when a logical operator is suspiciously always evaluating to true or false
 
 Wlogical-not-parentheses
-C ObjC C++ ObjC++ Var(warn_logical_not_paren) Warning
+C ObjC C++ ObjC++ Var(warn_logical_not_paren) Warning LangEnabledBy(C ObjC C++ 
ObjC++,Wall)
 Warn when logical not is used on the left hand side operand of a comparison
 
 Wlong-long
diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
index d15d4a9..9864708 100644
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -4800,6 +4800,8 @@ parentheses:
 if ((!a)  1) @{ @dots{} @}
 @end smallexample
 
+This warning is enabled by @option{-Wall}.
+
 @item -Waggregate-return
 @opindex Waggregate-return
 @opindex Wno-aggregate-return

Marek


Re: [PATCH][AArch64] Use CC_Z and CC_NZ with csinc and similar instructions

2014-09-02 Thread Kyrill Tkachov


On 02/09/14 16:47, Richard Henderson wrote:

On 09/02/2014 08:34 AM, Kyrill Tkachov wrote:

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

 * config/aarch64/predicates.md (aarch64_comparison_operation):
 New special predicate.
 * config/aarch64/aarch64.md (*csinc2mode_insn): Use
 aarch64_comparison_operation instead of matching an operator.
 Update operand numbers.
 (csinc3mode_insn): Likewise.
 (*csinv3mode_insn): Likewise.
 (*csneg3mode_insn): Likewise.
 (ffsmode2): Update gen_csinc3mode_insn callsite.
 * config/aarch64/aarch64.c (aarch64_get_condition_code): Export.
 Return -1 instead of aborting on invalid condition codes.
 (aarch64_print_operand): Update aarch64_get_condition_code callsites
 to assert that the returned condition code is valid.

Looks good to me.
Note that you missed the ChangeLog entry for aarch64-protos.h.


Thanks, it seems I had written the export part in the aarch64.c entry. 
Committed as r214824

with ChangeLog:

2014-09-02  Kyrylo Tkachov  kyrylo.tkac...@arm.com

* config/aarch64/predicates.md (aarch64_comparison_operation):
New special predicate.
* config/aarch64/aarch64.md (*csinc2mode_insn): Use
aarch64_comparison_operation instead of matching an operator.
Update operand numbers.
(csinc3mode_insn): Likewise.
(*csinv3mode_insn): Likewise.
(*csneg3mode_insn): Likewise.
(ffsmode2): Update gen_csinc3mode_insn callsite.
* config/aarch64/aarch64.c (aarch64_get_condition_code):
Return -1 instead of aborting on invalid condition codes.
(aarch64_print_operand): Update aarch64_get_condition_code callsites
to assert that the returned condition code is valid.
* config/aarch64/aarch64-protos.h (aarch64_get_condition_code): Export.


Kyrill




r~






Re: [PING][PATCH] Fix environment variables restoring in GCC testsuite.

2014-09-02 Thread Mike Stump

 When I ran Asan test on Asan-bootstrapped GCC, some of them fail with
 memory leaks into GCC, even if Lsan is disabled. This caused by slightly
 wrong logic in saving/restoring env variables functionality in
 gcc-dg.exp (some tests override ASAN_OPTIONS and this env variable isn't
 restored correcty).

 Ok to commit?

Ok.  I’ll note there are 22 other places like this.

Re: [PATCH] aarch64: Enable Neon search_line_fast

2014-09-02 Thread Richard Henderson
On 09/02/2014 08:51 AM, Ramana Radhakrishnan wrote:
 The ADDV instruction isn't available on the AArch32 side IIRC. Given that
 situation there is no intrinsic for ADDV on the AArch32 side which is why this
 doesn't exist in the AArch32 version of arm_neon.h :(

Whoops, yes indeed.  I clearly mis-read the spec.


r~


Re: [PATCH AArch64 2/2] Replace temporary inline assembler for vget_high

2014-09-02 Thread Marcus Shawcroft
On 12 August 2014 11:12, Alan Lawrence alan.lawre...@arm.com wrote:
 This patch replaces the current inline assembler for the vget_high
 intrinsics in arm_neon.h with a sequence of other calls, in a similar
 fashion to vget_low. Unlike the assembler, these are all transparent to the
 front-end, so should enable better optimization through the mid-end.

 Tested check-gcc and check-g++ and aarch64-none-elf and aarch64_be-none-elf
 (including new tests in previous patch!).

I think we are still waiting on ChangeLogs for this and the related patch?

/Marcus


Re: [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.

2014-09-02 Thread Marcus Shawcroft
On 19 August 2014 18:02, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64-simd.md (aarch64_rbitmode): New pattern.
 * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.

 * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8,
 vrbitq_u8):
 Replace temporary asm with call to builtin.
 (vrbit_p8, vrbitq_p8): New functions.

 gcc/testsuite/ChangeLog:

 * gcc.target/aarch64/simd/vrbit_1.c: New test.


OK
/Marcus


Re: [PATCH][AArch64] Remove varargs from aarch64_simd_expand_args

2014-09-02 Thread Marcus Shawcroft
On 20 August 2014 10:20, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args):
 Replace
 varargs with pointer parameter.
 (aarch64_simd_expand_builtin): pass pointer into previous.

OK /Marcus


Re: [PATCH][AArch64] One-liner: fix type of an add in SIMD registers

2014-09-02 Thread Marcus Shawcroft
On 20 August 2014 10:25, Alan Lawrence alan.lawre...@arm.com wrote:
 The SIMD-register variant is miscategorized as alu_reg despite not using
 any ALU registers, and should be neon_add for e.g. scheduling.

 Tested with check-gcc and check-g++ on aarch64-none-elf and
 aarch64_be-none-elf.

 gcc/ChangeLog:

 * config/aarch64/aarch64.md (adddi3_aarch64): set type to neon_add.

OK and back port please.

/Marcus


Re: [PATCH][AArch64] Tidy: remove unused qualifier_const_pointer

2014-09-02 Thread Marcus Shawcroft
On 20 August 2014 10:31, Alan Lawrence alan.lawre...@arm.com wrote:
 The only reference is in a comment.

 gcc/ChangeLog:

 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers):
 Remove qualifier_const_pointer, update comment.

OK /M


Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Andi Kleen
 Or we simply should make -finline work at -O0 (I suppose it might already
 work?) and use it.

Yes that's probably better. There are more hot inlines in the stage 1 profile
(like wi::storage_ref or vec::length)
I suspect with the ongoing C++'ification that will get worse.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only


Re: [C PATCH] Backport a fix for PR62294 to 4.9

2014-09-02 Thread Joseph S. Myers
On Tue, 2 Sep 2014, Marek Polacek wrote:

 PR62294 reports that 4.9 does not emit an incompatible pointer type
 warning in certain scenario.  I unknowingly broke this in r207335, and
 then fixed it in r210980, which is a follow-up to the former.  But 4.9
 doesn't have the latter.  This patch is basically a backport of r210980,
 only without the traditional conversion stuff.
 
 Bootstrapped/regtested on x86_64-linux, ok for 4.9?

OK with a testcase specifically for the regression case added on trunk and 
4.9 if there isn't one already.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [Patch, Fortran] PRs 61881/61888 - Fix issues with SIZEOF, CLASS(*) and assumed-rank

2014-09-02 Thread Thomas Schwinge
Hi Tobias!

On Sat, 26 Jul 2014 01:47:02 +0200, Tobias Burnus bur...@net-b.de wrote:
 2014-07-26  Tobias Burnus  bur...@net-b.de
 
   * check.c (gfc_check_sizeof): Permit for assumed type if and
   only if it has an array descriptor.
   * intrinsic.c (do_ts29113_check): Permit SIZEOF.
   (add_functions): SIZEOF is an Inquiry function.
   * intrinsic.texi (SIZEOF): Add note that only contiguous
   arrays are permitted.
   * trans-expr.c (gfc_conv_intrinsic_to_class): Handle assumed
   rank.
   * trans-intrinsic.c (gfc_conv_intrinsic_sizeof): Handle
   assumed type + array descriptor, CLASS and assumed rank.
   (gfc_conv_intrinsic_storage_size): Handle class arrays.
 
 2014-07-26  Tobias Burnus  bur...@net-b.de
 
   * gfortran.dg/sizeof_2.f90: Change dg-error.
   * gfortran.dg/sizeof_4.f90: New.
   * gfortran.dg/storage_size_1.f08: Correct expected
   value.

I noticed that the sizeof_4.f90 test case has not been checked in,
probably just forgot to svn add the file?

Searching for it in my emails, I also noticed that a year ago a similar
patch has been posted in
http://news.gmane.org/find-root.php?message_id=%3CCAKwh3qi633jU-ojPKqRa_16DKWhXn9L2N0Wr4trAG9p1dJ-sXg%40mail.gmail.com%3E,
but that is now probably obsolete.


Grüße,
 Thomas


pgpidlSQNRkyl.pgp
Description: PGP signature


Re: [gomp4] Add tables generation

2014-09-02 Thread Ilya Verbin
Hi Bernd,

This patch allows to compile binaries with offloading without passing -flto 
option, and
w/o performing link-time optimizations of the host code.

How it works:
1.  If there is at least one function or global variable to offload, gcc sets 
flag_generate_lto.
This enables writing the bytecode produced by ipa_write_summaries into
.gnu.target_lto_* sections (.gnu.lto_* sections are not created).
Also this flag emits LTO marker (__gnu_lto_v1).
2.  This step is not changed: collect2 scans object files for the LTO marker 
and fills the list
of LTO objects.  If the list is not empty, it runs lto-wrapper to perform 
link-time recompilation.
3.  lto-wrapper compiles images for targets.  And if -flto option is absent
(lto_mode == LTO_MODE_NONE), then it just returns the list of input objects 
without recompilation.

One known issue -- the final binary contains temporary .gnu.target_lto_* 
sections.
This can be solved by adding the following linker script to the list of input 
files:
SECTIONS { /DISCARD/ : { *(.gnu.target_lto_*) } }
But I'm sure what is the best way to this automatically.

Bootstrap and make check passed, tests with '#pragma omp target' without -flto 
passed.
What do you think?

Thanks,
  -- Ilya


---
 gcc/cgraphunit.c  | 39 +++
 gcc/lto-wrapper.c | 68 +--
 gcc/omp-low.c |  6 +
 gcc/passes.c  |  2 +-
 4 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index f0c9f5c..32b35f3 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -2040,13 +2040,26 @@ output_in_order (void)
   free (nodes);
 }
 
-/* Collect all global variables with omp declare target attribute into
-   OFFLOAD_VARS.  It will be streamed out in ipa_write_summaries.  */
+/* Check whether there is at least one function or global variable to offload.
+   Also collect all such global variables into OFFLOAD_VARS, the functions were
+   already collected in omp-low.c.  They will be streamed out in
+   ipa_write_summaries.  */
 
-static void
-init_offload_var_table (void)
+static bool
+initialize_offload (void)
 {
+  bool have_offload = false;
+  struct cgraph_node *node;
   struct varpool_node *vnode;
+
+  FOR_EACH_DEFINED_FUNCTION (node)
+if (lookup_attribute (omp declare target, DECL_ATTRIBUTES (node-decl)))
+  {
+   have_offload = true;
+   break;
+  }
+
   FOR_EACH_DEFINED_VARIABLE (vnode)
 {
   if (!lookup_attribute (omp declare target,
@@ -2054,13 +2067,17 @@ init_offload_var_table (void)
  || TREE_CODE (vnode-decl) != VAR_DECL
  || DECL_SIZE (vnode-decl) == 0)
continue;
+  have_offload = true;
   vec_safe_push (offload_vars, vnode-decl);
 }
+
+  return have_offload;
 }
 
 static void
 ipa_passes (void)
 {
+  bool have_offload = false;
   gcc::pass_manager *passes = g-get_passes ();
 
   set_cfun (NULL);
@@ -2068,6 +2085,14 @@ ipa_passes (void)
   gimple_register_cfg_hooks ();
   bitmap_obstack_initialize (NULL);
 
+  if (!in_lto_p  (flag_openacc || flag_openmp))
+{
+  have_offload = initialize_offload ();
+  /* OpenACC / OpenMP offloading requires LTO infrastructure.  */
+  if (have_offload)
+   flag_generate_lto = 1;
+}
+
   invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL);
 
   if (!in_lto_p)
@@ -2108,11 +2133,7 @@ ipa_passes (void)
 
   if (!in_lto_p)
 {
-  init_offload_var_table ();
-
-  if ((flag_openacc || flag_openmp)
-  !(vec_safe_is_empty (offload_funcs)
-   vec_safe_is_empty (offload_vars)))
+  if (have_offload)
{
  section_name_prefix = OMP_SECTION_NAME_PREFIX;
  ipa_write_summaries (true);
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index 80d10f3..e9245f1 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -668,6 +668,11 @@ run_gcc (unsigned argc, char *argv[])
  close (fd);
  continue;
}
+  /* We may choose not to write out this .opts section in the future.  In
+that case we'll have to use something else to look for.  */
+  if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX . opts,
+ offset, length, errmsg, err))
+   have_offload = true;
   if (!simple_object_find_section (sobj, LTO_SECTION_NAME_PREFIX . 
opts,
   offset, length, errmsg, err))
{
@@ -675,11 +680,6 @@ run_gcc (unsigned argc, char *argv[])
  close (fd);
  continue;
}
-  /* We may choose not to write out this .opts section in the future.  In
-that case we'll have to use something else to look for.  */
-  if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX . opts,
- offset, length, errmsg, err))
-   have_offload = true;
   lseek (fd, file_offset + offset, SEEK_SET);
   data = (char *)xmalloc (length);
  

[Patch, Fortran] Component declarations overwrite types of Cray Pointee variables

2014-09-02 Thread Fritz Reese
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62174

The typespecs for Cray pointees are overwritten by the typespecs of
components with the same name which are declared later.

This problem was introduced with Cray pointer support in 4.1.0 and is
confirmed up through trunk (5.0).

Here is a proposed patch from 4.8.3 (test case comments/ChangeLog
descriptions are updated from the submission on bugzilla). The test
case demonstrates the problem.

FYI, I am currently working with my employer so any future changes I
have can comply with GNU's legal requirements. Also my mail client
replaces tabs with spaces so I'm sorry for any whitespace issues.

2014-09-02  Fritz Reese  reese-fr...@zai.com

PR fortran/62174
* decl.c (variable_decl): Don't overwrite typespecs of Cray pointees
 when matching a component declaration.

diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index 4048ac9..7b3c59a 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -1904,8 +1904,9 @@ variable_decl (int elem)
 }

   /*  If this symbol has already shown up in a Cray Pointer declaration,
+  and this is not a component declaration,
   then we want to set the type  bail out.  */
-  if (gfc_option.flag_cray_pointer)
+  if (gfc_option.flag_cray_pointer  gfc_current_state () != COMP_DERIVED)
 {
   gfc_find_symbol (name, gfc_current_ns, 1, sym);
   if (sym != NULL  sym-attr.cray_pointee)


2014-09-02  Fritz Reese  reese-fr...@zai.com

PR fortran/62174
* gcc/testsuite/gfortran.dg/cray_pointers_11.f90: New.

diff --git a/gcc/testsuite/gfortran.dg/cray_pointers_11.f90
b/gcc/testsuite/gfortran.dg/cray_pointers_11.f90
new file mode 100644
index 000..038e4dc
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/cray_pointers_11.f90
@@ -0,0 +1,22 @@
+! { dg-do compile }
+! { dg-options -fcray-pointer }
+!
+! PR fortran/62174
+! Component declarations within derived types would overwrite the typespec of
+! variables with the same name who were Cray pointees.
+implicit none
+
+type t1
+  integer i
+end type t1
+type(t1) x
+
+pointer (x_ptr, x)
+
+type t2
+  real x ! should not overwrite x's type
+end type t2
+
+x%i = 0 ! should see no error here
+
+end

---
Fritz Reese


Re: [PATCH] Force rtl templates to be inlined

2014-09-02 Thread Andi Kleen
 I suspect the bulk of them currently are coming from the safe_as_a
 rtx_insn * calls within NEXT_INSN and PREV_INSN; do you happen to have
 information handy on that?

Yes that's right:

-   1.03%  lto1[.] bool 
is_a_helperrtx_insn*::testrtx_def(rtx_def*) 
▒
   - bool is_a_helperrtx_insn*::testrtx_def(rtx_def*)   
▒
  - 92.20% bool is_artx_insn*, rtx_def(rtx_def*)  
▒
 - 98.53% rtx_insn* safe_as_artx_insn*, rtx_def(rtx_def*) 
▒
- 73.28% NEXT_INSN(rtx_insn const*) 
▒


RFA: Document first operand to RTX_AUTOINC

2014-09-02 Thread Richard Sandiford
As Jeff suggested here:
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00390.html
this patch documents that the first operand to an RTX_AUTOINC
is the automodified register.

Tested on x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* doc/rtl.texi (RTX_AUTOINC): Document that the first operand is
the automodified register.

Index: gcc/doc/rtl.texi
===
--- gcc/doc/rtl.texi2014-05-06 18:38:47.982200623 +0100
+++ gcc/doc/rtl.texi2014-08-30 16:04:50.870456416 +0100
@@ -193,7 +193,8 @@ An RTX code for something that matches i
 
 @item RTX_AUTOINC
 An RTX code for an auto-increment addressing mode, such as
-@code{POST_INC}.
+@code{POST_INC}.  @samp{XEXP (@var{x}, 0)} gives the auto-modified
+register.
 
 @item RTX_EXTRA
 All other RTX codes.  This category includes the remaining codes used


Re: [PATCH] C++ thunk section names

2014-09-02 Thread Sriraman Tallam
Ping.



On Wed, Aug 6, 2014 at 2:42 PM, Sriraman Tallam tmsri...@google.com wrote:
 Hi,

 Just wondering if you got a chance to look at this?

 Sri

 On Tue, Jul 8, 2014 at 10:45 AM, Sriraman Tallam tmsri...@google.com wrote:
 On Tue, Jul 8, 2014 at 10:38 AM, Sriraman Tallam tmsri...@google.com wrote:
 On Mon, Jul 7, 2014 at 11:48 AM, Jan Hubicka hubi...@ucw.cz wrote:
 Hello,
 I apologize for taking so long to get into this patch.  I ad busy time 
 (wedding
 and teaching), should be back in regular schedule now.

 Sri, can you provide examples to show why putting thunks into the same
 section as the target function when function reorder is on can be bad
 ?

 C++ ABI specify that they are in the same section, but I can't think of the
 case where this would break.
 Hmm, I suppose it is the TARGET_USE_LOCAL_THUNK_ALIAS_P code that breaks -
 you end up with code in two sections where one is accessing local comdat
 of the toher. I would also like to see testcase that breaks and is fixed by
 your patch.  I would expect that here, by not copying the section name,
 you actually make things wose.

 Here is an example where the thunk and the original function get
 placed in different sections.

 class base_class_1
 {
 public:
   virtual void vfn () {}
 };

 class base_class_2
 {
 public:
   virtual void vfn () {}
 };
 void foo();
 class need_thunk_class : public base_class_1, public base_class_2
 {
 public:
   virtual void vfn () {
 for (int i = 0; i  10; ++i)
   foo();
   }
 };

 int main (int argc, char *argv[])
 {
   base_class_1 *bc1 = new need_thunk_class ();
   bc1-vfn();
   return 0;
 }

 int glob = 0;
 __attribute__((noinline))
 void foo()
 {
   glob++;
 }


 I am making the function that needs thunk hot. Now,

 $ g++ thunkex.cc  -O2 -fno-reorder-blocks-and-partition
 -fprofile-generate -ffunction-sections
 $ a.out
 $ g++ thunkex.cc  -O2 -fno-reorder-blocks-and-partition -fprofile-use
 -ffunction-sections -c
 $ objdump -d thunkex.o

 Disassembly of section .text.hot._ZN16need_thunk_class3vfnEv:

  _ZN16need_thunk_class3vfnEv:
0:   53  push   %rbx
1:   bb a0 86 01 00  mov$0x186a0,%ebx
...

 Disassembly of section .text._ZN16need_thunk_class3vfnEv:

  _ZThn8_N16need_thunk_class3vfnEv:
0:   48 83 ef 08 sub$0x8,%rdi


 When the original function gets moved to .text.hot, the thunk does
 not.  It is not always the case that the thunk should either.

 I forgot to add that this becomes confusing because, in this case, the
 thunk is the only function sitting in a section whose name does not
 correspond to its assembler name.  If we are not going to have thunk
 section names the same as the original function when profiles are
 available and -freorder-functions is used, we as well change the name
 of the thunk's section to correspond to its assembler name. That was
 the intention of the patch.

 Thanks
 Sri



 Thanks
 Sri





 I think we need to deal with this later; use_tunk is done long before
 profiling is read and before we decide whether code is hot/cold.  I suppose
 the function reordering code may need to always walk whole comdat group and
 ensure that sections are same?
 I.e. pick the highest profile of a function in the group, resolve unique 
 section
 on it and then copy section names?  I had verifier checking that section 
 names
 within one comdat groups are same, perhaps it was part of the reverted 
 patch
 for AIX.  I will try to get that one back in now.

 Jan

 Thanks,

 David

 On Thu, Jun 26, 2014 at 10:29 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
  Hi Honza,
 
 Could you review this patch when you find time?
 
  Thanks
  Sri
 
  On Tue, Jun 17, 2014 at 10:42 AM, Sriraman Tallam tmsri...@google.com 
  wrote:
  Ping.
 
  On Mon, Jun 9, 2014 at 3:54 PM, Sriraman Tallam tmsri...@google.com 
  wrote:
  Ping.
 
  On Mon, May 19, 2014 at 11:25 AM, Sriraman Tallam 
  tmsri...@google.com wrote:
  Ping.
 
  On Thu, Apr 17, 2014 at 10:41 AM, Sriraman Tallam 
  tmsri...@google.com wrote:
  Ping.
 
  On Wed, Feb 5, 2014 at 4:31 PM, Sriraman Tallam 
  tmsri...@google.com wrote:
  Hi,
 
I would like this patch reviewed and considered for commit when
  Stage 1 is active again.
 
  Patch Description:
 
  A C++ thunk's section name is set to be the same as the original 
  function's
  section name for which the thunk was created in order to place the 
  two
  together.  This is done in cp/method.c in function use_thunk.
  However, with function reordering turned on, the original 
  function's section
  name can change to something like .text.hot.orginal or
  .text.unlikely.original in function default_function_section 
  in varasm.c
  based on the node count of that function.  The thunk function's 
  section name
  is not updated to be the same as the original here and also is not 
  always
  correct to do it as the original function can be hotter than the 
  thunk.
 
  I have created 

Re: [PATCH x86_64] Optimize access to globals in -fpie -pie builds with copy relocations

2014-09-02 Thread Sriraman Tallam
Ping.

On Fri, Jul 11, 2014 at 10:42 AM, Sriraman Tallam tmsri...@google.com wrote:
 Ping.

 On Thu, Jun 26, 2014 at 10:54 AM, Sriraman Tallam tmsri...@google.com wrote:
 Hi Uros,

Could you please review this patch?

 Thanks
 Sri

 On Fri, Jun 20, 2014 at 5:17 PM, Sriraman Tallam tmsri...@google.com wrote:
 Patch Updated.

 Sri

 On Mon, Jun 9, 2014 at 3:55 PM, Sriraman Tallam tmsri...@google.com wrote:
 Ping.

 On Mon, May 19, 2014 at 11:11 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 Ping.

 On Thu, May 15, 2014 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 Optimize access to globals with -fpie, x86_64 only:

 Currently, with -fPIE/-fpie, GCC accesses globals that are extern to the 
 module
 using the GOT.  This is two instructions, one to get the address of the 
 global
 from the GOT and the other to get the value.  If it turns out that the 
 global
 gets defined in the executable at link-time, it still needs to go 
 through the
 GOT as it is too late then to generate a direct access.

 Examples:

 foo.cc
 --
 int a_glob;
 int main () {
   return a_glob; // defined in this file
 }

 With -O2 -fpie -pie, the generated code directly accesses the global via
 PC-relative insn:

 5e0   main:
mov0x165a(%rip),%eax# 1c40 a_glob

 foo.cc
 --

 extern int a_glob;
 int main () {
   return a_glob; // defined in this file
 }

 With -O2 -fpie -pie, the generated code accesses global via GOT using two
 memory loads:

 6f0  main:
mov0x1609(%rip),%rax   # 1d00 _DYNAMIC+0x230
mov(%rax),%eax

 This is true even if in the latter case the global was defined in the
 executable through a different file.

 Some experiments on google benchmarks shows that the extra memory loads 
 affects
 performance by 1% to 5%.


 Solution - Copy Relocations:

 When the linker supports copy relocations, GCC can always assume that the
 global will be defined in the executable.  For globals that are truly 
 extern
 (come from shared objects), the linker will create copy relocations and 
 have
 them defined in the executable. Result is that no global access needs to 
 go
 through the GOT and hence improves performance.

 This patch to the gold linker :
 https://sourceware.org/ml/binutils/2014-05/msg00092.html
 submitted recently allows gold to generate copy relocations for -pie 
 mode when
 necessary.

 I have added option -mld-pie-copyrelocs which when combined with -fpie 
 would do
 this.  Note that the BFD linker does not support pie copyrelocs yet and 
 this
 option cannot be used there.

 Please review.


 ChangeLog:

 * config/i386/i36.opt (mld-pie-copyrelocs): New option.
 * config/i386/i386.c (legitimate_pic_address_disp_p): Check if this
  address is still legitimate in the presence of copy relocations
  and -fpie.
 * testsuite/gcc.target/i386/ld-pie-copyrelocs-1.c: New test.
 * testsuite/gcc.target/i386/ld-pie-copyrelocs-2.c: New test.



 Patch attached.
 Thanks
 Sri


RFA: Merge definitions of get_some_local_dynamic_name

2014-09-02 Thread Richard Sandiford
Several targets define a function like i386's get_some_local_dynamic_name.
The function looks through the current output function and returns the first
(arbitrary) local-dynamic symbol that it finds.  The result can be used in
a call to __tls_get_addr, since all local-dynamic symbols have the same base.

This patch replaces the various target functions with a single generic one.
The only difference between the implementations was that s390 checked
for constant pool references while the others didn't need to (because
they don't allow TLS symbols to be forced into the pool).  Checking for
constant pool references is unnecessary but harmless for the other ports.
Also, the walk is needed only once per TLS-referencing output function,
so it's hardly critical in terms of compile time.

All uses of this function are in final.  In general it wouldn't be
safe to call the function earlier than that, since the symbol reference
could in principle be deleted by any rtl pass.  I've therefore cached
it in a variable local to final rather than in cfun (which is where
the ports used to cache it).

Also, i386 was robust against uses of % in inline asm.  The patch
makes sure the other ports are too.  Using % in inline asm would
often be a mistake, but it should at least trigger a proper error
rather than an ICE.

Tested on x86_64-linux-gnu.  Also tested by building cross compilers
before and after the change on:

  alpha-linux-gnu powerpc64-linux-gnu s390x-linux-gnu sparc64-linux-gnu

OK to install?

Thanks,
Richard


gcc/
* output.h (get_some_local_dynamic_name): Declare.
* final.c (some_local_dynamic_name): New variable.
(get_some_local_dynamic_name): New function.
(final_end_function): Clear some_local_dynamic_name.
* config/alpha/alpha.c (machine_function): Remove some_ld_name.
(get_some_local_dynamic_name, get_some_local_dynamic_name_1): Delete.
(print_operand): Report an error if '%' is used inappropriately.
* config/i386/i386.c (get_some_local_dynamic_name): Delete.
(get_some_local_dynamic_name_1): Delete.
* config/rs6000/rs6000.c (machine_function): Remove some_ld_name.
(rs6000_get_some_local_dynamic_name): Delete.
(rs6000_get_some_local_dynamic_name_1): Delete.
(print_operand): Report an error if '%' is used inappropriately.
* config/s390/s390.c (machine_function): Remove some_ld_name.
(get_some_local_dynamic_name, get_some_local_dynamic_name_1): Delete.
(print_operand): Assert that get_some_local_dynamic_name is nonnull.
* config/sparc/sparc.c: Include rtl-iter.h.
(machine_function): Remove some_ld_name.
(sparc_print_operand): Report an error if '%' is used inappropriately.
(get_some_local_dynamic_name, get_some_local_dynamic_name_1): Delete.

Index: gcc/output.h
===
--- gcc/output.h2014-08-31 21:05:04.701330252 +0100
+++ gcc/output.h2014-09-02 19:02:59.820482510 +0100
@@ -52,6 +52,8 @@ extern int get_attr_min_length (rtx);
any branches of variable length if possible.  */
 extern void shorten_branches (rtx_insn *);
 
+const char *get_some_local_dynamic_name ();
+
 /* Output assembler code for the start of a function,
and initialize some of the variables in this file
for the new function.  The label for the function and associated
Index: gcc/final.c
===
--- gcc/final.c 2014-08-31 21:05:04.701330252 +0100
+++ gcc/final.c 2014-09-02 19:17:08.573876805 +0100
@@ -1719,6 +1719,38 @@ reemit_insn_block_notes (void)
   reorder_blocks ();
 }
 
+static const char *some_local_dynamic_name;
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in local-dynamic base patterns.
+   Return null if there are no local-dynamic references.  */
+
+const char *
+get_some_local_dynamic_name ()
+{
+  subrtx_iterator::array_type array;
+  rtx_insn *insn;
+
+  if (some_local_dynamic_name)
+return some_local_dynamic_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+if (NONDEBUG_INSN_P (insn))
+  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+   {
+ const_rtx x = *iter;
+ if (GET_CODE (x) == SYMBOL_REF)
+   {
+ if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+   return some_local_dynamic_name = XSTR (x, 0);
+ if (CONSTANT_POOL_ADDRESS_P (x))
+   iter.substitute (get_pool_constant (x));
+   }
+   }
+
+  return 0;
+}
+
 /* Output assembler code for the start of a function,
and initialize some of the variables in this file
for the new function.  The label for the function and associated
@@ -1904,6 +1936,8 @@ final_end_function (void)
   if (!dwarf2_debug_info_emitted_p (current_function_decl)
dwarf2out_do_frame ())
 

Re: [4.9] PR 62146

2014-09-02 Thread Easwaran Raman
It turns out that the REG_EQUAL note is removed on a hoisted
instruction (relevant code is in dead_or_predicable in ifcvt.c) if the
source of the move instruction is not a function invariant. In this
case, the source is a function invariant (constant) and so that
doesn't kick in. I don't understand why this exemption for function
invariant is there and the original thread in
https://gcc.gnu.org/ml/gcc/2005-05/msg01710.html doesn't explain
either. Should I just remove the REG_EQUAL notes of all hoisted
instructions or are there cases where it is safe to leave the note?

Thanks,
Easwaran



On Fri, Aug 29, 2014 at 1:06 PM, Jeff Law l...@redhat.com wrote:
 On 08/25/14 16:42, Easwaran Raman wrote:

 This patch deletes REG_EQUAL note when a src register is replaced by a
 constant in an assignment. This is to prevent spurious equivalences
 between the constant and the expression in the REG_EQUAL note. In the
 bug reported in PR 62146, an assignment in one branch (which is
 actually dead) of an IF statement has a REG_EQUAL note equating a
 register with an expression. Conditional copy propagation replaces the
 register with 0. The instruction is hoisted above the branch
 subsequently and then the value 0 is equated with the expression in
 the REG_EQUAL. Is this ok for 4.9 branch if all tests pass?

 This patch looks applicable to trunk as well, but I don't have a test
 case to reproduce the issue in trunk.

 Something doesn't feel right with this patch.  It seems to me the real
 problem is when when hoist the insn with the note.  If the equivalence
 implied by the note is no longer valid at the insn's new location, then the
 note needs to be removed.

 Now determining if the note is no longer valid at the new location may prove
 difficult ;-)  You'd probably have to know why the note was created, how it
 was changed, etc.  So I suspect the right thing to do is just remove
 REG_EQUAL notes on any insns we hoist in this manner.

 Jeff


Re: [FORTRAN PATCH] Two -Wlogical-not-parentheses fixes (PR fortran/62270)

2014-09-02 Thread Thomas Koenig
Am 02.09.2014 17:32, schrieb Tobias Burnus:
 
 Marek Polacek wrote:
 This patch fixes the last two spots where -Wlogical-not-parentheses
 warns.  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62270#c3
 if you want more info about the changes.

 Bootstrapped/regtested on x86_64-linux, ok for trunk?
 
 Looks good to me. Thanks for the patch!

As this commit fixes obvious errors for not-so-obvious
cases, what about a backport?

Thomas



Re: [PATCH x86_64] Optimize access to globals in -fpie -pie builds with copy relocations

2014-09-02 Thread Richard Henderson
On 06/20/2014 05:17 PM, Sriraman Tallam wrote:
 Index: config/i386/i386.c
 ===
 --- config/i386/i386.c(revision 211826)
 +++ config/i386/i386.c(working copy)
 @@ -12691,7 +12691,9 @@ legitimate_pic_address_disp_p (rtx disp)
   return true;
   }
 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
 - SYMBOL_REF_LOCAL_P (op0)
 + (SYMBOL_REF_LOCAL_P (op0)
 +|| (TARGET_64BIT  ix86_copyrelocs  flag_pie
 + !SYMBOL_REF_FUNCTION_P (op0)))
   ix86_cmodel != CM_LARGE_PIC)
   return true;
 break;

This is the wrong place to patch.

You ought to be adjusting SYMBOL_REF_LOCAL_P, by providing a modified
TARGET_BINDS_LOCAL_P.

Note in particular that I believe that you are doing the wrong thing with weak
and COMMON symbols, in that you probably ought not force a copy reloc there.

Note the complexity of default_binds_local_p_1, and the fact that all you
really want to modify is

  /* If PIC, then assume that any global name can be overridden by
 symbols resolved from other modules.  */
  else if (shlib)
local_p = false;

near the bottom of that function.


r~


Re: Ping^2 - RE: [PATCH] Add target hook to override DWARF2 frame register size

2014-09-02 Thread Richard Henderson
On 09/02/2014 01:59 AM, Matthew Fortune wrote:
  gcc/
 * target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook.
 * targhooks.c (default_dwarf_frame_reg_mode): New function.
 * targhooks.h (default_dwarf_frame_reg_mode): New prototype.
 * doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document.
 * doc/tm.texi: Regenerate.
 * dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode
 selection logic to default_dwarf_frame_reg_mode.

Ok.


r~


[PATCH] PR61889

2014-09-02 Thread Rong Xu
This patch makes the build of gcov-tool configurable. It checks if
ftw.h is available. For mingw build, it provides ftw functionality by
using FindFirstFile/FindNextFile/FindClose API.

Tested with and without --disable-gcov-tool.

Thanks,

-Rong
2014-09-02  Rong Xu  x...@google.com

* gcc/Makefile.in: Make the build gcov-tool configurable.
* gcc/configure.ac: Ditto.
* gcc/configure: Ditto.
* gcc/config.in: Ditto.
* gcc/gcov-tool.c (unlink_gcda_file): Support win32 build.
(unlink_profile_dir): Ditto.
* libgcc/libgcov-util.c (read_gcda_file): Ditto.
(read_file_handler): Ditto.
(ftw_read_file): Ditto.
(myftw): Ditto.
(gcov_read_profile_dir): Ditto.
(gcov_profile_normalize): Ditto.

Index: gcc/Makefile.in
===
--- gcc/Makefile.in (revision 214831)
+++ gcc/Makefile.in (working copy)
@@ -123,9 +123,13 @@ SUBDIRS =@subdirs@ build
 
 # Selection of languages to be made.
 CONFIG_LANGUAGES = @all_selected_languages@
-LANGUAGES = c gcov$(exeext) gcov-dump$(exeext) gcov-tool$(exeext) \
-$(CONFIG_LANGUAGES)
+LANGUAGES = c gcov$(exeext) gcov-dump$(exeext) $(CONFIG_LANGUAGES)
 
+disable_gcov_tool = @disable_gcov_tool@
+ifneq ($(disable_gcov_tool),yes)
+LANGUAGES += gcov-tool$(exeext)
+endif
+
 # Default values for variables overridden in Makefile fragments.
 # CFLAGS is for the user to override to, e.g., do a cross build with -O2.
 # TCFLAGS is used for compilations with the GCC just built.
Index: gcc/configure.ac
===
--- gcc/configure.ac(revision 214831)
+++ gcc/configure.ac(working copy)
@@ -5650,6 +5650,26 @@ if test ${ENABLE_LIBQUADMATH_SUPPORT} != no ;
 fi
 
 
+# Check if gcov-tool can be built.
+AC_ARG_ENABLE(gcov-tool,
+[AS_HELP_STRING([--disable-gcov-tool],
+[disable the build of gcov-tool])])
+if test x$enable_gcov_tool = xno; then
+  disable_gcov_tool=yes
+else
+  AC_CHECK_HEADERS(ftw.h, [disable_gcov_tool=no],
+   [case $host_os in
+  win32 | cygwin* | mingw32*)
+disable_gcov_tool=no
+;;
+  *)
+disable_gcov_tool=yes
+;;
+esac])
+fi
+AC_SUBST(disable_gcov_tool)
+
+
 # Specify what hash style to use by default.
 AC_ARG_WITH([linker-hash-style],
 [AC_HELP_STRING([--with-linker-hash-style={sysv,gnu,both}],
Index: gcc/configure
===
--- gcc/configure   (revision 214831)
+++ gcc/configure   (working copy)
@@ -600,6 +600,7 @@ ac_includes_default=\
 
 ac_subst_vars='LTLIBOBJS
 LIBOBJS
+disable_gcov_tool
 PICFLAG
 enable_host_shared
 enable_plugin
@@ -932,6 +933,7 @@ enable_version_specific_runtime_libs
 enable_plugin
 enable_host_shared
 enable_libquadmath_support
+enable_gcov_tool
 with_linker_hash_style
 '
   ac_precious_vars='build_alias
@@ -1655,6 +1657,7 @@ Optional Features:
   --enable-host-sharedbuild host code as shared libraries
   --disable-libquadmath-support
   disable libquadmath support for Fortran
+  --disable-gcov-tool disable the build of gcov-tool
 
 Optional Packages:
   --with-PACKAGE[=ARG]use PACKAGE [ARG=yes]
@@ -8353,7 +8356,7 @@ fi
 for ac_header in limits.h stddef.h string.h strings.h stdlib.h time.h iconv.h \
 fcntl.h unistd.h sys/file.h sys/time.h sys/mman.h \
 sys/resource.h sys/param.h sys/times.h sys/stat.h \
-direct.h malloc.h langinfo.h ldfcn.h locale.h wchar.h
+direct.h malloc.h langinfo.h ldfcn.h locale.h wchar.h ftw.h
 do :
   as_ac_Header=`$as_echo ac_cv_header_$ac_header | $as_tr_sh`
 ac_fn_c_check_header_preproc $LINENO $ac_header $as_ac_Header
@@ -18033,7 +18036,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat  conftest.$ac_ext _LT_EOF
-#line 18036 configure
+#line 18039 configure
 #include confdefs.h
 
 #if HAVE_DLFCN_H
@@ -18139,7 +18142,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat  conftest.$ac_ext _LT_EOF
-#line 18142 configure
+#line 18145 configure
 #include confdefs.h
 
 #if HAVE_DLFCN_H
@@ -28116,6 +28119,33 @@ $as_echo #define ENABLE_LIBQUADMATH_SUPPORT 1 
 fi
 
 
+# Check if gcov-tool can be built.
+# Check whether --enable-gcov-tool was given.
+if test ${enable_gcov_tool+set} = set; then :
+  enableval=$enable_gcov_tool;
+fi
+
+if test x$enable_gcov_tool = xno; then
+  disable_gcov_tool=yes
+else
+  ac_fn_c_check_header_preproc $LINENO ftw.h ac_cv_header_ftw_h
+if test x$ac_cv_header_ftw_h = xyes; then :
+  disable_gcov_tool=no
+else
+  case $host_os in
+  win32 | cygwin* | mingw32*)
+disable_gcov_tool=no
+   

Re: [PATCH] PowerPC: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV

2014-09-02 Thread Adhemerval Zanella
Ping.

On 19-08-2014 13:54, Adhemerval Zanella wrote:
 Ping.

 On 06-08-2014 17:21, Adhemerval Zanella wrote:
 On 01-08-2014 12:31, Joseph S. Myers wrote:
 On Thu, 31 Jul 2014, David Edelsohn wrote:

 Thanks for implementing the FENV support.  The patch generally looks 
 good to me.

 My one concern is a detail in the implementation of update. I do not
 have enough experience with GENERIC to verify the details and it seems
 like it is missing building an outer COMPOUND_EXPR containing
 update_mffs and the CALL_EXPR for update mtfsf.
 I suppose what's actually odd there is that you have

 +  tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, 
 call_mffs);
 +
 +  tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, update_mffs);

 so you build a MODIFY_EXPR in void_type_node but then convert it with a 
 VIEW_CONVERT_EXPR.  If you'd built the MODIFY_EXPR in double_type_node 
 then the VIEW_CONVERT_EXPR would be meaningful (the value of an assignment 
 a = b being the new value of a), but reinterpreting a void value doesn't 
 make sense.  Or you could probably just use call_mffs directly in the 
 VIEW_CONVERT_EXPR without explicitly creating the old_fenv variable.

 Thanks for the review Josephm.  I have changed to avoid the void 
 reinterpretation
 and use call_mffs directly.  I have also removed the the mask generation in 
 'clear'
 from your previous message, it is now reusing the mas used in feholdexcept.  
 The 
 testcase patch is the same as before.

 Checked on both linux-powerpc64/powerpc64le and no regressions found.

 --

 2014-08-06  Adhemerval Zanella  azane...@linux.vnet.ibm.com

 gcc:
  * config/rs6000/rs6000.c (rs6000_atomic_assign_expand_fenv): New
  function.

 gcc/testsuite:
  * gcc.dg/atomic/c11-atomic-exec-5.c
  (test_main_long_double_add_overflow): Define and run only for
  LDBL_MANT_DIG != 106.
  (test_main_complex_long_double_add_overflow): Likewise.
  (test_main_long_double_sub_overflow): Likewise.
  (test_main_complex_long_double_sub_overflow): Likewise.

 ---

 diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
 index d088ff6..7d66eb1 100644
 --- a/gcc/config/rs6000/rs6000.c
 +++ b/gcc/config/rs6000/rs6000.c
 @@ -1631,6 +1631,9 @@ static const struct attribute_spec 
 rs6000_attribute_table[] =

  #undef TARGET_CAN_USE_DOLOOP_P
  #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 +
 +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
  

  /* Processor table.  */
 @@ -7,6 +33340,80 @@ emit_fusion_gpr_load (rtx *operands)
return ;
  }

 +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
 +
 +static void
 +rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 +{
 +  if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
 +return;
 +
 +  tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
 +  tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
 +  tree call_mffs = build_call_expr (mffs, 0);
 +
 +  /* Generates the equivalent of feholdexcept (fenv_var)
 +
 + *fenv_var = __builtin_mffs ();
 + double fenv_hold;
 + *(uint64_t*)fenv_hold = *(uint64_t*)fenv_var  0x0007LL;
 + __builtin_mtfsf (0xff, fenv_hold);  */
 +
 +  /* Mask to clear everything except for the rounding modes and non-IEEE
 + arithmetic flag.  */
 +  const unsigned HOST_WIDE_INT hold_exception_mask =
 +HOST_WIDE_INT_UC (0x0007);
 +
 +  tree fenv_var = create_tmp_var (double_type_node, NULL);
 +
 +  tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, 
 call_mffs);
 +
 +  tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
 +  tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
 +build_int_cst (uint64_type_node, hold_exception_mask));
 +
 +  tree fenv_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 
 fenv_llu_and);
 +
 +  tree hold_mtfsf = build_call_expr (mtfsf, 2,
 +build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf);
 +
 +  *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
 +
 +  /* Reload the value of fenv_hold to clear the exceptions.  */
 +
 +  *clear = build_call_expr (mtfsf, 2,
 +build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf);
 +
 +  /* Generates the equivalent of feupdateenv (fenv_var)
 +
 + double old_fenv = __builtin_mffs ();
 + double fenv_update;
 + *(uint64_t*)fenv_update = (*(uint64_t*)old  0x1f00LL) |
 +(*(uint64_t*)fenv_var 0x1ff80fff);
 + __builtin_mtfsf (0xff, fenv_update);  */
 +
 +  const unsigned HOST_WIDE_INT update_exception_mask =
 +HOST_WIDE_INT_UC (0x1f00);
 +  const unsigned HOST_WIDE_INT new_exception_mask =
 +HOST_WIDE_INT_UC (0x1ff80fff);
 +
 +  tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, call_mffs);
 +  tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
 +old_llu, 

Re: [C PATCH] Don't reject valid code with _Alignas (PR c/61053)

2014-09-02 Thread H.J. Lu
On Thu, May 8, 2014 at 11:19 AM, Marek Polacek pola...@redhat.com wrote:
 On Wed, May 07, 2014 at 11:31:38AM -0700, H.J. Lu wrote:
  OK, though I'm not sure if the lp64 conditions are right in the testcase

 It should be !ia32 instead of lp64.

 Ok, I changed lp64 to ! { ia32 } and committed the patch now.

 Marek

The change is insufficient for x32, which has the same alignments
for floating point types and the integer types with the same size as
x86-64.  This patch is needed for x32.  OK for trunk and 4.8 branch?


-- 
H.J.
---
2014-09-02  H.J. Lu  hongjiu...@intel.com

* gcc.dg/pr61053.c: Updated for x32.

diff --git a/gcc/testsuite/gcc.dg/pr61053.c b/gcc/testsuite/gcc.dg/pr61053.c
index 4fd5319..5557784 100644
--- a/gcc/testsuite/gcc.dg/pr61053.c
+++ b/gcc/testsuite/gcc.dg/pr61053.c
@@ -31,17 +31,17 @@ _Alignas (long double) int ild;

 _Alignas (char) long int lic; /* { dg-error cannot reduce alignment } */
 _Alignas (short int) long int lis; /* { dg-error cannot reduce alignment } */
-_Alignas (int) long int lii; /* { dg-error cannot reduce alignment
 { target { ! { ia32 } } } } */
+_Alignas (int) long int lii; /* { dg-error cannot reduce alignment
 { target { ! { ia32 || x32 } } } } */
 _Alignas (long int) long int lil;
 _Alignas (long long int) long int lill;
-_Alignas (float) long int lif; /* { dg-error cannot reduce
alignment  { target { ! { ia32 } } } } */
+_Alignas (float) long int lif; /* { dg-error cannot reduce
alignment  { target { ! { ia32 || x32 } } } } */
 _Alignas (double) long int lid;
 _Alignas (long double) long int lild;

 _Alignas (char) long long int llic; /* { dg-error cannot reduce
alignment } */
 _Alignas (short int) long long int llis; /* { dg-error cannot reduce
alignment } */
 _Alignas (int) long long int llii; /* { dg-error cannot reduce
alignment  { target { ! { ia32 } } } } */
-_Alignas (long int) long long int llil;
+_Alignas (long int) long long int llil; /* { dg-error cannot reduce
alignment  { target { x32 } } } */
 _Alignas (long long int) long long int llill;
 _Alignas (float) long long int llif; /* { dg-error cannot reduce
alignment  { target { ! { ia32 } } } } */
 _Alignas (double) long long int llid;
@@ -59,7 +59,7 @@ _Alignas (long double) float fld;
 _Alignas (char) double dc; /* { dg-error cannot reduce alignment } */
 _Alignas (short int) double ds; /* { dg-error cannot reduce alignment } */
 _Alignas (int) double di; /* { dg-error cannot reduce alignment 
{ target { ! { ia32 } } } } */
-_Alignas (long int) double dl;
+_Alignas (long int) double dl; /* { dg-error cannot reduce
alignment  { target { x32 } } } */
 _Alignas (long long int) double dll;
 _Alignas (float) double df; /* { dg-error cannot reduce alignment
 { target { ! { ia32 } } } } */
 _Alignas (double) double dd;


Re: [C PATCH] Don't reject valid code with _Alignas (PR c/61053)

2014-09-02 Thread H.J. Lu
On Tue, Sep 2, 2014 at 3:29 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 8, 2014 at 11:19 AM, Marek Polacek pola...@redhat.com wrote:
 On Wed, May 07, 2014 at 11:31:38AM -0700, H.J. Lu wrote:
  OK, though I'm not sure if the lp64 conditions are right in the testcase

 It should be !ia32 instead of lp64.

 Ok, I changed lp64 to ! { ia32 } and committed the patch now.

 Marek

 The change is insufficient for x32, which has the same alignments
 for floating point types and the integer types with the same size as
 x86-64.  This patch is needed for x32.  OK for trunk and 4.8 branch?


 --
 H.J.
 ---
 2014-09-02  H.J. Lu  hongjiu...@intel.com

 * gcc.dg/pr61053.c: Updated for x32.


Here is the patch as an attachment.


-- 
H.J.
2014-09-02  H.J. Lu  hongjiu...@intel.com

	* gcc.dg/pr61053.c: Updated for x32.

diff --git a/gcc/testsuite/gcc.dg/pr61053.c b/gcc/testsuite/gcc.dg/pr61053.c
index 4fd5319..5557784 100644
--- a/gcc/testsuite/gcc.dg/pr61053.c
+++ b/gcc/testsuite/gcc.dg/pr61053.c
@@ -31,17 +31,17 @@ _Alignas (long double) int ild;
 
 _Alignas (char) long int lic; /* { dg-error cannot reduce alignment } */
 _Alignas (short int) long int lis; /* { dg-error cannot reduce alignment } */
-_Alignas (int) long int lii; /* { dg-error cannot reduce alignment  { target { ! { ia32 } } } } */
+_Alignas (int) long int lii; /* { dg-error cannot reduce alignment  { target { ! { ia32 || x32 } } } } */
 _Alignas (long int) long int lil;
 _Alignas (long long int) long int lill;
-_Alignas (float) long int lif; /* { dg-error cannot reduce alignment  { target { ! { ia32 } } } } */
+_Alignas (float) long int lif; /* { dg-error cannot reduce alignment  { target { ! { ia32 || x32 } } } } */
 _Alignas (double) long int lid;
 _Alignas (long double) long int lild;
 
 _Alignas (char) long long int llic; /* { dg-error cannot reduce alignment } */
 _Alignas (short int) long long int llis; /* { dg-error cannot reduce alignment } */
 _Alignas (int) long long int llii; /* { dg-error cannot reduce alignment  { target { ! { ia32 } } } } */
-_Alignas (long int) long long int llil;
+_Alignas (long int) long long int llil; /* { dg-error cannot reduce alignment  { target { x32 } } } */
 _Alignas (long long int) long long int llill;
 _Alignas (float) long long int llif; /* { dg-error cannot reduce alignment  { target { ! { ia32 } } } } */
 _Alignas (double) long long int llid;
@@ -59,7 +59,7 @@ _Alignas (long double) float fld;
 _Alignas (char) double dc; /* { dg-error cannot reduce alignment } */
 _Alignas (short int) double ds; /* { dg-error cannot reduce alignment } */
 _Alignas (int) double di; /* { dg-error cannot reduce alignment  { target { ! { ia32 } } } } */
-_Alignas (long int) double dl;
+_Alignas (long int) double dl; /* { dg-error cannot reduce alignment  { target { x32 } } } */
 _Alignas (long long int) double dll;
 _Alignas (float) double df; /* { dg-error cannot reduce alignment  { target { ! { ia32 } } } } */
 _Alignas (double) double dd;


[PATCH 1/2] use rtx_code_label more

2014-09-02 Thread tsaunders
From: Trevor Saunders tsaund...@mozilla.com

Hi,

$subject

bootstrapped + regtested on x86_64-unknown-linux-gnu, and run through
config-list.mk.  Will commit it shortly as preapproved by Jeff in
http://gcc.gnu.org/ml/gcc-patches/2014-08/msg01310.html

Trev

gcc/

* asan.c, cfgexpand.c, config/alpha/alpha.md, config/arm/arm.c,
config/epiphany/epiphany.md, config/h8300/h8300.c, config/i386/i386.md,
config/m32r/m32r.c, config/mcore/mcore.md, config/mips/mips.c,
config/mips/mips.md, config/nios2/nios2.c, config/pa/pa.c,
config/s390/s390.c, config/s390/s390.md, config/sh/sh-mem.cc,
config/sh/sh.c, config/sparc/sparc.c, dojump.c, function.c, optabs.c,
stmt.c: Assign the result of gen_label_rtx to rtx_code_label * instead
of rtx.
---
 gcc/ChangeLog   | 11 +++
 gcc/asan.c  |  7 ---
 gcc/cfgexpand.c |  4 ++--
 gcc/config/alpha/alpha.md   |  4 ++--
 gcc/config/arm/arm.c|  6 +++---
 gcc/config/epiphany/epiphany.md |  2 +-
 gcc/config/h8300/h8300.c|  4 ++--
 gcc/config/i386/i386.md | 16 
 gcc/config/m32r/m32r.c  |  2 +-
 gcc/config/mcore/mcore.md   |  4 ++--
 gcc/config/mips/mips.c  | 11 ++-
 gcc/config/mips/mips.md | 16 
 gcc/config/nios2/nios2.c|  2 +-
 gcc/config/pa/pa.c  |  2 +-
 gcc/config/s390/s390.c  | 24 
 gcc/config/s390/s390.md | 28 ++--
 gcc/config/sh/sh-mem.cc | 34 +-
 gcc/config/sh/sh.c  |  9 +
 gcc/config/sparc/sparc.c| 20 ++--
 gcc/dojump.c|  8 
 gcc/function.c  |  2 +-
 gcc/optabs.c|  5 ++---
 gcc/stmt.c  |  6 +++---
 23 files changed, 120 insertions(+), 107 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 22e69c0..3144019 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
 2014-09-02  Trevor Saunders  tsaund...@mozilla.com
 
+   * asan.c, cfgexpand.c, config/alpha/alpha.md, config/arm/arm.c,
+   config/epiphany/epiphany.md, config/h8300/h8300.c, config/i386/i386.md,
+   config/m32r/m32r.c, config/mcore/mcore.md, config/mips/mips.c,
+   config/mips/mips.md, config/nios2/nios2.c, config/pa/pa.c,
+   config/s390/s390.c, config/s390/s390.md, config/sh/sh-mem.cc,
+   config/sh/sh.c, config/sparc/sparc.c, dojump.c, function.c, optabs.c,
+   stmt.c: Assign the result of gen_label_rtx to rtx_code_label * instead
+   of rtx.
+
+2014-09-02  Trevor Saunders  tsaund...@mozilla.com
+
* alloc-pool.c: Include coretypes.h.
* cgraph.h, dbxout.c, dwarf2out.c, except.c, except.h, function.c,
function.h, symtab.c, tree-cfg.c, tree-eh.c: Use hash_map and
diff --git a/gcc/asan.c b/gcc/asan.c
index 4ed9344..cf5de27 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -983,7 +983,8 @@ rtx_insn *
 asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb,
HOST_WIDE_INT *offsets, tree *decls, int length)
 {
-  rtx shadow_base, shadow_mem, ret, mem, orig_base, lab;
+  rtx shadow_base, shadow_mem, ret, mem, orig_base;
+  rtx_code_label *lab;
   rtx_insn *insns;
   char buf[30];
   unsigned char shadow_bytes[4];
@@ -1174,10 +1175,10 @@ asan_emit_stack_protection (rtx base, rtx pbase, 
unsigned int alignb,
   /* Construct epilogue sequence.  */
   start_sequence ();
 
-  lab = NULL_RTX;  
+  lab = NULL;
   if (use_after_return_class != -1)
 {
-  rtx lab2 = gen_label_rtx ();
+  rtx_code_label *lab2 = gen_label_rtx ();
   char c = (char) ASAN_STACK_MAGIC_USE_AFTER_RET;
   int very_likely = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
   emit_cmp_and_jump_insns (orig_base, base, EQ, NULL_RTX,
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 0a1b4bf..7d11b1b 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2007,7 +2007,7 @@ label_rtx_for_bb (basic_block bb ATTRIBUTE_UNUSED)
   return label_rtx (lab);
 }
 
-  rtx l = gen_label_rtx ();
+  rtx_code_label *l = gen_label_rtx ();
   lab_rtx_for_bb-put (bb, l);
   return l;
 }
@@ -2469,7 +2469,7 @@ expand_asm_operands (tree string, tree outputs, tree 
inputs,
   enum machine_mode *inout_mode = XALLOCAVEC (enum machine_mode, noutputs);
   const char **constraints = XALLOCAVEC (const char *, noutputs + ninputs);
   int old_generating_concat_p = generating_concat_p;
-  rtx fallthru_label = NULL_RTX;
+  rtx_code_label *fallthru_label = NULL;
 
   /* An ASM with no outputs needs to be treated as volatile, for now.  */
   if (noutputs == 0)
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 34ff1f0..8cf8a06 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -4907,8 +4907,8 @@
 }
   else
 {
-  rtx out_label = 0;

[PATCH 2/2] use rtx_code_label in lab_rtx_for_bb hash map

2014-09-02 Thread tsaunders
From: Trevor Saunders tsaund...@mozilla.com

Hi,

$subject again

bootstrapped + regtested on x86_64-unknown-linux-gnu, and run through
config-list.mk.  Will commit it shortly as preapproved by Jeff in
http://gcc.gnu.org/ml/gcc-patches/2014-08/msg01310.html

Trev

gcc/ChangeLog:

* cfgexpand.c (label_rtx_for_bb): Change type to
hash_mapbasic_block, rtx_code_label * *.
(expand_gimple_basic_block): Adjust.
(pass_expand::execute): Likewise.
---
 gcc/ChangeLog   | 7 +++
 gcc/cfgexpand.c | 8 
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3144019..ca786c2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
 2014-09-02  Trevor Saunders  tsaund...@mozilla.com
 
+   * cfgexpand.c (label_rtx_for_bb): Change type to
+   hash_mapbasic_block, rtx_code_label * *.
+   (expand_gimple_basic_block): Adjust.
+   (pass_expand::execute): Likewise.
+
+2014-09-02  Trevor Saunders  tsaund...@mozilla.com
+
* asan.c, cfgexpand.c, config/alpha/alpha.md, config/arm/arm.c,
config/epiphany/epiphany.md, config/h8300/h8300.c, config/i386/i386.md,
config/m32r/m32r.c, config/mcore/mcore.md, config/mips/mips.c,
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 7d11b1b..db76897 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1974,7 +1974,7 @@ maybe_dump_rtl_for_gimple_stmt (gimple stmt, rtx_insn 
*since)
 
 /* Maps the blocks that do not contain tree labels to rtx labels.  */
 
-static hash_mapbasic_block, rtx *lab_rtx_for_bb;
+static hash_mapbasic_block, rtx_code_label * *lab_rtx_for_bb;
 
 /* Returns the label_rtx expression for a label starting basic block BB.  */
 
@@ -1988,7 +1988,7 @@ label_rtx_for_bb (basic_block bb ATTRIBUTE_UNUSED)
   if (bb-flags  BB_RTL)
 return block_label (bb);
 
-  rtx *elt = lab_rtx_for_bb-get (bb);
+  rtx_code_label **elt = lab_rtx_for_bb-get (bb);
   if (elt)
 return *elt;
 
@@ -4945,7 +4945,7 @@ expand_gimple_basic_block (basic_block bb, bool 
disable_tail_calls)
stmt = NULL;
 }
 
-  rtx *elt = lab_rtx_for_bb-get (bb);
+  rtx_code_label **elt = lab_rtx_for_bb-get (bb);
 
   if (stmt || elt)
 {
@@ -5815,7 +5815,7 @@ pass_expand::execute (function *fun)
   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (fun)-succs)
 e-flags = ~EDGE_EXECUTABLE;
 
-  lab_rtx_for_bb = new hash_mapbasic_block, rtx;
+  lab_rtx_for_bb = new hash_mapbasic_block, rtx_code_label *;
   FOR_BB_BETWEEN (bb, init_block-next_bb, EXIT_BLOCK_PTR_FOR_FN (fun),
  next_bb)
 bb = expand_gimple_basic_block (bb, var_ret_seq != NULL_RTX);
-- 
2.1.0



[RFC] Tweak gcc.c-torture/execute/pr39228.c

2014-09-02 Thread Kaz Kojima
Hi,

gcc.c-torture/execute/pr39228.c fails with (test for excess errors)
on SH for recent revisions.  My gcc.log says:

gcc.c-torture/execute/pr39228.c:20:43: warning: always_inline function might 
not be inlinable [-Wattributes]
...

It looks that alpha has the similar issue:
https://gcc.gnu.org/ml/gcc-testresults/2014-08/msg02660.html

alpha and sh redefine dg-options to -mieee in the test case
instead of the default dg-options -w and get the above warning.
The patch below tweaks the test to fix it.  Perhaps the first
two lines are enough to avoid the error but avoiding the root cause
of warnings would be better.  Tested on i686-linux and sh4-linux.

Regards,
kaz
--
* gcc.c-torture/execute/pr39228.c: Add -w option on sh*-*-*
and alhpa*-*-*.  Add inline keyword to test functions.

--- ORIG/trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-08-26 
09:26:20.0 +0900
+++ trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c 2014-09-03 
07:42:30.085524983 +0900
@@ -1,23 +1,23 @@
-/* { dg-options -mieee { target sh*-*-* alpha*-*-* } } */
+/* { dg-options -w -mieee { target sh*-*-* alpha*-*-* } } */
 /* { dg-skip-if No Inf/NaN support { spu-*-* } *  } */
 
 extern void abort (void);
 
-static int __attribute__((always_inline)) testf (float b)
+static inline int __attribute__((always_inline)) testf (float b)
 {
   float c = 1.01f * b;
 
   return __builtin_isinff (c);
 }
 
-static int __attribute__((always_inline)) test (double b)
+static inline int __attribute__((always_inline)) test (double b)
 {
   double c = 1.01 * b;
 
   return __builtin_isinf (c);
 }
 
-static int __attribute__((always_inline)) testl (long double b)
+static inline int __attribute__((always_inline)) testl (long double b)
 {
   long double c = 1.01L * b;
 


Re: fix gfcov regression

2014-09-02 Thread Nathan Sidwell

On 08/23/14 10:33, Nathan Sidwell wrote:

Hi,
this patch fixes a defect Jan found with firefox and its shared objects.  We
were inadvertently calling an externally visible and overridable symbol, rather
than the local shared object's instance.  This led to strangely sparse gcov
results.

I've taken the STRONG_ALIAS #define from glibc.  I'm not 100% sure it's valid
for all supported targets.

Tested in x86_64-linux

I've not committed this patch because of that, and (b) I'm  about to emmigrate,
so likely to be unable to respond to any potential fallout in a timely manner.


I've committed the patch now.



Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass

2014-09-02 Thread Bin.Cheng
On Tue, Sep 2, 2014 at 9:40 PM, Segher Boessenkool
seg...@kernel.crashing.org wrote:
 On Tue, Sep 02, 2014 at 02:10:32PM +0200, Ulrich Weigand wrote:
 In any case, this test in can_combine_p rejects a combination for *two*
 different issues.  One is the earlyclobber problem, which is what that
 2004 thread was about, and which my patch back then relaxed for fixed
 hard register.

 However, this doesn't seem to apply to the example above; that is really
 about the second problem: don't substitute into a clobber.

 Right.

 I understand the reason why this particular substitution is rejected is
 simply that if it weren't, we'd be substituting flags:CC=cmp(r84:SI,0x1)
 into clobber flags:CC, resulting in clobber cmp(r84:SI,0x1), which is
 invalid RTL.

 I checked, and that is indeed what combine does.  How silly.

 Now I guess this check could be relaxed if somewhere else in combine we'd
 recognize the substitution into a clobber and simply omit it in that case.

 Yeah.

 In the testcase, combine tries combining 76,77 (77 is that clobbering
 insn) and refuses it; then it tries 32,76,77 and refuses it; and then
 it tries 32,76,77,43 and allows it (it doesn't do this check at all,
 77 is not i3, combine omits the clobber completely).  Which is inconsistent.

I guess it makes sense because this way it doesn't introduce any
invalid instructions.  But yes, how combine handles the clobber in
this way may help combine the three instructions?

Thanks,
bin

 What a mess.  Thanks for looking!


 Segher


Re: [RFC] Tweak gcc.c-torture/execute/pr39228.c

2014-09-02 Thread Oleg Endo
Hi,

On Sep 3, 2014, at 2:42 AM, Kaz Kojima kkoj...@rr.iij4u.or.jp wrote:

 Hi,
 
 gcc.c-torture/execute/pr39228.c fails with (test for excess errors)
 on SH for recent revisions.  My gcc.log says:
 
 gcc.c-torture/execute/pr39228.c:20:43: warning: always_inline function might 
 not be inlinable [-Wattributes]
 ...
 
 It looks that alpha has the similar issue:
 https://gcc.gnu.org/ml/gcc-testresults/2014-08/msg02660.html
 
 alpha and sh redefine dg-options to -mieee in the test case
 instead of the default dg-options -w and get the above warning.
 The patch below tweaks the test to fix it.  Perhaps the first
 two lines are enough to avoid the error but avoiding the root cause
 of warnings would be better.  Tested on i686-linux and sh4-linux.

-mieee should be the default on sh* and thus can be removed from the dg-options 
line, or is it not?  If -mieee is still needed (for alpha) maybe it's better to 
use dg-additional-options instead?

Cheers,
Oleg



 Regards,
kaz
 --
* gcc.c-torture/execute/pr39228.c: Add -w option on sh*-*-*
and alhpa*-*-*.  Add inline keyword to test functions.
 
 --- ORIG/trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-08-26 
 09:26:20.0 +0900
 +++ trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-09-03 
 07:42:30.085524983 +0900
 @@ -1,23 +1,23 @@
 -/* { dg-options -mieee { target sh*-*-* alpha*-*-* } } */
 +/* { dg-options -w -mieee { target sh*-*-* alpha*-*-* } } */
 /* { dg-skip-if No Inf/NaN support { spu-*-* } *  } */
 
 extern void abort (void);
 
 -static int __attribute__((always_inline)) testf (float b)
 +static inline int __attribute__((always_inline)) testf (float b)
 {
   float c = 1.01f * b;
 
   return __builtin_isinff (c);
 }
 
 -static int __attribute__((always_inline)) test (double b)
 +static inline int __attribute__((always_inline)) test (double b)
 {
   double c = 1.01 * b;
 
   return __builtin_isinf (c);
 }
 
 -static int __attribute__((always_inline)) testl (long double b)
 +static inline int __attribute__((always_inline)) testl (long double b)
 {
   long double c = 1.01L * b;
 


Re: [RFC] Tweak gcc.c-torture/execute/pr39228.c

2014-09-02 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 -mieee should be the default on sh* and thus can be removed from the 
 dg-options line, or is it not?  If -mieee is still needed (for alpha) maybe 
 it's better to use dg-additional-options instead?

Sure.  The attached is a revised one.

Regards,
kaz
--
* gcc.c-torture/execute/pr39228.c: Use dg-additional-options
instead of dg-options and remove sh*-*-* from its target list.
Add inline keyword to test functions.

--- ORIG/trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-08-26 
09:26:20.0 +0900
+++ trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c 2014-09-03 
14:16:23.313631715 +0900
@@ -1,23 +1,23 @@
-/* { dg-options -mieee { target sh*-*-* alpha*-*-* } } */
+/* { dg-additional-options -mieee { target alpha*-*-* } } */
 /* { dg-skip-if No Inf/NaN support { spu-*-* } *  } */
 
 extern void abort (void);
 
-static int __attribute__((always_inline)) testf (float b)
+static inline int __attribute__((always_inline)) testf (float b)
 {
   float c = 1.01f * b;
 
   return __builtin_isinff (c);
 }
 
-static int __attribute__((always_inline)) test (double b)
+static inline int __attribute__((always_inline)) test (double b)
 {
   double c = 1.01 * b;
 
   return __builtin_isinf (c);
 }
 
-static int __attribute__((always_inline)) testl (long double b)
+static inline int __attribute__((always_inline)) testl (long double b)
 {
   long double c = 1.01L * b;
 


  1   2   >