Re: [PATCH x86] Increase PARAM_MAX_COMPLETELY_PEELED_INSNS when branch is costly

2014-10-13 Thread Richard Biener
On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko evstu...@gmail.com wrote:
 Hi,

 The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with
 high branch cost.
 Bootstrap and make check are in progress.
 The patch boosts (up to 2,5 times improve) several benchmarks compiled
 with -Ofast on Silvermont
 Spec2000:
 +5% gain on 173.applu
 +1% gain on 255.vortex

 Is it ok for trunk when pass bootstrap and make check?

This is only a 20% increase - from 100 to 120.  I would instead suggest
to explore doing this change unconditionally if it helps that much.

Richard.

 Thanks,
 Evgeny

 2014-10-10  Evgeny Stupachenko  evstu...@gmail.com
 * config/i386/i386.c (ix86_option_override_internal): Increase
 PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost.
 * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New.
 * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates
 CPUs with high branch cost.

 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
 index 6337aa5..5ac10eb 100644
 --- a/gcc/config/i386/i386.c
 +++ b/gcc/config/i386/i386.c
 @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p,
  opts-x_param_values,
  opts_set-x_param_values);

 +  /* Extend full peel max insns parameter for CPUs with high branch cost.  */
 +  if (TARGET_HIGH_BRANCH_COST)
 +maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
 +  120,
 +  opts-x_param_values,
 +  opts_set-x_param_values);
 +
 +
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
if (opts-x_flag_prefetch_loop_arrays  0
 HAVE_prefetch
 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
 index 2c64162..da0c57b 100644
 --- a/gcc/config/i386/i386.h
 +++ b/gcc/config/i386/i386.h
 @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
  #define TARGET_INTER_UNIT_CONVERSIONS \
 ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
  #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
 +#define TARGET_HIGH_BRANCH_COST
 ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST]
  #define TARGET_SCHEDULEix86_tune_features[X86_TUNE_SCHEDULE]
  #define TARGET_USE_BT  ix86_tune_features[X86_TUNE_USE_BT]
  #define TARGET_USE_INCDEC  ix86_tune_features[X86_TUNE_USE_INCDEC]
 diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
 index b6b210e..04d8bf8 100644
 --- a/gcc/config/i386/x86-tune.def
 +++ b/gcc/config/i386/x86-tune.def
 @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, four_jump_limit,
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
   m_ATHLON_K8 | m_AMDFAM10)

 +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost.  This could 
 be
 +   used to tune unroll, if-cvt, inline... heuristics.  */
 +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, high_branch_cost,
 +  m_BONNELL | m_SILVERMONT | m_INTEL)
 +
  
 /*/
  /* Integer instruction selection tuning  
 */
  
 /*/


[PATCH] Fix typo in comment for IRA

2014-10-13 Thread Kito Cheng
Hi all:

This patch contain lots typo fix for IRA module by aspell :)

ChangeLog

2014-10-13  Kito Cheng  k...@0xlab.org

* ira.c: Fix typo in comment.
* ira.h: Ditto.
* ira-build.c: Ditto.
* ira-color.c: Ditto.
* ira-emit.c: Ditto.
* ira-int.h: Ditto.
* ira-lives.c: Ditto.
From e7268d1f6e3367a345b2e614a21e596c6ccf621f Mon Sep 17 00:00:00 2001
From: Kito Cheng k...@andestech.com
Date: Fri, 22 Aug 2014 16:27:18 +0800
Subject: [PATCH] Fix typo in comment for IRA

2014-10-13  Kito Cheng  k...@0xlab.org

	* ira.c: Fix typo in comment.
	* ira.h: Ditto.
	* ira-build.c: Ditto.
	* ira-color.c: Ditto.
	* ira-emit.c: Ditto.
	* ira-int.h: Ditto.
	* ira-lives.c: Ditto.
---
 gcc/ira-build.c |  2 +-
 gcc/ira-color.c | 10 +-
 gcc/ira-costs.c |  8 
 gcc/ira-emit.c  |  4 ++--
 gcc/ira-int.h   |  4 ++--
 gcc/ira-lives.c |  2 +-
 gcc/ira.c   | 24 
 gcc/ira.h   |  2 +-
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/gcc/ira-build.c b/gcc/ira-build.c
index 9c99166..98df8cd 100644
--- a/gcc/ira-build.c
+++ b/gcc/ira-build.c
@@ -1224,7 +1224,7 @@ ira_create_pref (ira_allocno_t a, int hard_regno, int freq)
   return pref;
 }
 
-/* Attach a pref PREF to the cooresponding allocno.  */
+/* Attach a pref PREF to the corresponding allocno.  */
 static void
 add_allocno_pref_to_list (ira_pref_t pref)
 {
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index 841d0d1..a25022f 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -104,7 +104,7 @@ struct update_cost_record
 struct allocno_color_data
 {
   /* TRUE value means that the allocno was not removed yet from the
- conflicting graph during colouring.  */
+ conflicting graph during coloring.  */
   unsigned int in_graph_p : 1;
   /* TRUE if it is put on the stack to make other allocnos
  colorable.  */
@@ -1203,7 +1203,7 @@ struct update_cost_queue_elem
  connecting this allocno to the one being allocated.  */
   int divisor;
 
-  /* Allocno from which we are chaning costs of connected allocnos.
+  /* Allocno from which we are chaining costs of connected allocnos.
  It is used not go back in graph of allocnos connected by
  copies.  */
   ira_allocno_t from;
@@ -1928,7 +1928,7 @@ copy_freq_compare_func (const void *v1p, const void *v2p)
   if (pri2 - pri1)
 return pri2 - pri1;
 
-  /* If freqencies are equal, sort by copies, so that the results of
+  /* If frequencies are equal, sort by copies, so that the results of
  qsort leave nothing to chance.  */
   return cp1-num - cp2-num;
 }
@@ -1983,7 +1983,7 @@ merge_threads (ira_allocno_t t1, ira_allocno_t t2)
   ALLOCNO_COLOR_DATA (t1)-thread_freq += ALLOCNO_COLOR_DATA (t2)-thread_freq;
 }
 
-/* Create threads by processing CP_NUM copies from sorted)ciopeis.  We
+/* Create threads by processing CP_NUM copies from sorted copies.  We
process the most expensive copies first.  */
 static void
 form_threads_from_copies (int cp_num)
@@ -3606,7 +3606,7 @@ conflict_by_live_ranges_p (int regno1, int regno2)
 
   ira_assert (regno1 = FIRST_PSEUDO_REGISTER
 	   regno2 = FIRST_PSEUDO_REGISTER);
-  /* Reg info caclulated by dataflow infrastructure can be different
+  /* Reg info calculated by dataflow infrastructure can be different
  from one calculated by regclass.  */
   if ((a1 = ira_loop_tree_root-regno_allocno_map[regno1]) == NULL
   || (a2 = ira_loop_tree_root-regno_allocno_map[regno2]) == NULL)
diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c
index 38d0e0e..5ecbc0b 100644
--- a/gcc/ira-costs.c
+++ b/gcc/ira-costs.c
@@ -118,7 +118,7 @@ struct cost_classes
   /* Container of the cost classes.  */
   enum reg_class classes[N_REG_CLASSES];
   /* Map reg class - index of the reg class in the previous array.
- -1 if it is not a cost classe.  */
+ -1 if it is not a cost classes.  */
   int index[N_REG_CLASSES];
   /* Map hard regno index of first class in array CLASSES containing
  the hard regno, -1 otherwise.  */
@@ -277,7 +277,7 @@ setup_regno_cost_classes_by_aclass (int regno, enum reg_class aclass)
decrease number of cost classes for the pseudo, if hard registers
of some important classes can not hold a value of MODE.  So the
pseudo can not get hard register of some important classes and cost
-   calculation for such important classes is only waisting CPU
+   calculation for such important classes is only wasting CPU
time.  */
 static void
 setup_regno_cost_classes_by_mode (int regno, enum machine_mode mode)
@@ -314,7 +314,7 @@ setup_regno_cost_classes_by_mode (int regno, enum machine_mode mode)
   regno_cost_classes[regno] = classes_ptr;
 }
 
-/* Finilize info about the cost classes for each pseudo.  */
+/* Finalize info about the cost classes for each pseudo.  */
 static void
 finish_regno_cost_classes (void)
 {
@@ -1238,7 +1238,7 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)
  then we may want to adjust the cost of that register 

Re: [gofrontend-dev] Re: [PATCH 03/13] HACK! Allow the static chain to be set from C

2014-10-13 Thread Richard Biener
On Sat, Oct 11, 2014 at 6:23 AM, Richard Henderson r...@redhat.com wrote:
 On 10/10/2014 06:42 PM, Peter Collingbourne wrote:
 A colleague has suggested a perhaps nicer syntax:

 __builtin_call_chain(pointer, call) where call must be a call expression

 I like this.

 Unlike the other suggestions, it doesn't mess with the parsing of the 
 regular
 part of the function call.  And, depending on what point the builtin is 
 lowered
 and applied to the AST, it might not require any parsing changes at all.

 I'll have a look at this next week.  Thanks.

Does the frontend know that the call expects a static chain?  If so
I like Ians suggestion more:


How crazy would it be to move __builtin_call_chain into the function
arguments, as in
function(a1, a2, __builtin_call_chain(pointer))
This __builtin_call_chain call would be removed from the argument list
so type checking would only look at a1, a2.  It would just set the
static chain value.  That at least puts the call_chain in the right
place, which is a special kind of function argument.


Richard.


 r~


[Ada] Fix PR ada/63225

2014-10-13 Thread Eric Botcazou
As reported by Alan, the Ada compiler doesn't build anymore if you compile it 
with -fno-inline because cuintp.c references a private function of uintp.adb.

Tested on x86_64-suse-linux, applied on all active branches.


2014-10-13  Eric Botcazou  ebotca...@adacore.com
Alan Modra amo...@gmail.com

PR ada/63225
* uintp.adb (Vector_To_Uint): Move from here to...
* uintp.ads (UI_Vector): Make public.
(Vector_To_Uint): ...here.


-- 
Eric BotcazouIndex: uintp.ads
===
--- uintp.ads	(revision 216116)
+++ uintp.ads	(working copy)
@@ -90,6 +90,18 @@ package Uintp is
Uint_Minus_80  : constant Uint;
Uint_Minus_128 : constant Uint;
 
+   type UI_Vector is array (Pos range ) of Int;
+   --  Vector containing the integer values of a Uint value
+
+   --  Note: An earlier version of this package used pointers of arrays of Ints
+   --  (dynamically allocated) for the Uint type. The change leads to a few
+   --  less natural idioms used throughout this code, but eliminates all uses
+   --  of the heap except for the table package itself. For example, Uint
+   --  parameters are often converted to UI_Vectors for internal manipulation.
+   --  This is done by creating the local UI_Vector using the function N_Digits
+   --  on the Uint to find the size needed for the vector, and then calling
+   --  Init_Operand to copy the values out of the table into the vector.
+
-
-- Subprograms --
-
@@ -252,6 +264,22 @@ package Uintp is
--  function is used for capacity checks, and it can be one bit off
--  without affecting its usage.
 
+   function Vector_To_Uint
+ (In_Vec   : UI_Vector;
+  Negative : Boolean) return Uint;
+   --  Functions that calculate values in UI_Vectors, call this function to
+   --  create and return the Uint value. In_Vec contains the multiple precision
+   --  (Base) representation of a non-negative value. Leading zeroes are
+   --  permitted. Negative is set if the desired result is the negative of the
+   --  given value. The result will be either the appropriate directly
+   --  represented value, or a table entry in the proper canonical format is
+   --  created and returned.
+   --
+   --  Note that Init_Operand puts a signed value in the result vector, but
+   --  Vector_To_Uint is always presented with a non-negative value. The
+   --  processing of signs is something that is done by the caller before
+   --  calling Vector_To_Uint.
+
-
-- Output Routines --
-
@@ -494,18 +522,6 @@ private
--  UI_Vector is defined for this purpose and some internal subprograms
--  used for converting from one to the other are defined.
 
-   type UI_Vector is array (Pos range ) of Int;
-   --  Vector containing the integer values of a Uint value
-
-   --  Note: An earlier version of this package used pointers of arrays of Ints
-   --  (dynamically allocated) for the Uint type. The change leads to a few
-   --  less natural idioms used throughout this code, but eliminates all uses
-   --  of the heap except for the table package itself. For example, Uint
-   --  parameters are often converted to UI_Vectors for internal manipulation.
-   --  This is done by creating the local UI_Vector using the function N_Digits
-   --  on the Uint to find the size needed for the vector, and then calling
-   --  Init_Operand to copy the values out of the table into the vector.
-
type Uint_Entry is record
   Length : Pos;
   --  Length of entry in Udigits table in digits (i.e. in words)
Index: uintp.adb
===
--- uintp.adb	(revision 216116)
+++ uintp.adb	(working copy)
@@ -171,22 +171,6 @@ package body Uintp is
--  If Discard_Quotient is True, Quotient is set to No_Uint
--  If Discard_Remainder is True, Remainder is set to No_Uint
 
-   function Vector_To_Uint
- (In_Vec   : UI_Vector;
-  Negative : Boolean) return Uint;
-   --  Functions that calculate values in UI_Vectors, call this function to
-   --  create and return the Uint value. In_Vec contains the multiple precision
-   --  (Base) representation of a non-negative value. Leading zeroes are
-   --  permitted. Negative is set if the desired result is the negative of the
-   --  given value. The result will be either the appropriate directly
-   --  represented value, or a table entry in the proper canonical format is
-   --  created and returned.
-   --
-   --  Note that Init_Operand puts a signed value in the result vector, but
-   --  Vector_To_Uint is always presented with a non-negative value. The
-   --  processing of signs is something that is done by the caller before
-   --  calling Vector_To_Uint.
-

-- Direct --



Re: [PATCH] PR debug/38757 gcc does not emit DW_LANG_C99

2014-10-13 Thread Mark Wielaard
On Wed, 2014-10-08 at 12:13 +0200, Mark Wielaard wrote:
 This patch was written a long time ago by Jakub and has been in Fedora
 gcc for some time. All I did was rebase it to current gcc trunk
 and add a testcase. Back when it was originally proposed the issue was
 that because DWARF was generated late adding new lang hooks for this
 was problematic for LTO. Now that there is a move towards generating
 DWARF early I am hoping this patch can finally make it to mainline gcc.

Ping.

 This lang hook will be more useful when DWARFv5 gets out, which is
 supposed to define language identifiers for newer C and C++ versions.
 
 Previous discussions:
 http://gcc.gnu.org/ml/gcc-patches/2009-03/msg00858.html
 http://gcc.gnu.org/ml/gcc-patches/2010-04/msg00991.html
 
 gcc/ChangeLog
 
 2009-03-18  Jakub Jelinek  ja...@redhat.com
 
   PR debug/38757
   * langhooks.h (struct lang_hooks): Add source_language langhook.
   * langhooks-def.h (LANG_HOOKS_SOURCE_LANGUAGE): Define to NULL.
   (LANG_HOOKS_INITIALIZER): Add LANG_HOOKS_SOURCE_LANGUAGE.
   * dwarf2out.c (add_prototyped_attribute): Add DW_AT_prototype
   also for DW_LANG_{C,C99,ObjC}.
   (gen_compile_unit_die): Use lang_hooks.source_language () to
   determine if DW_LANG_C99 or DW_LANG_C89 should be returned.
 
 gcc/c/ChangeLog
 
 2009-03-18  Jakub Jelinek  ja...@redhat.com
 
   PR debug/38757
   * c-lang.c (c_source_language): New function.
   (LANG_HOOKS_SOURCE_LANGUAGE): Define.
 
 gcc/testsuite/ChangeLog
 
 2014-10-08  Mark Wielaard  m...@redhat.com
 
   PR debug/38757
   * gcc.dg/debug/dwarf2/lang-c89.c: New test.
   * gcc.dg/debug/dwarf2/lang-c99.c: Likewise.
 ---
  gcc/ChangeLog| 11 +++
  gcc/c/ChangeLog  |  6 ++
  gcc/c/c-lang.c   |  8 
  gcc/dwarf2out.c  | 19 ---
  gcc/langhooks-def.h  |  4 +++-
  gcc/langhooks.h  |  4 
  gcc/testsuite/ChangeLog  |  6 ++
  gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c |  6 ++
  gcc/testsuite/gcc.dg/debug/dwarf2/lang-c99.c |  6 ++
  9 files changed, 66 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c
  create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/lang-c99.c
 
 diff --git a/gcc/c/c-lang.c b/gcc/c/c-lang.c
 index 97c0443..dadeb1a 100644
 --- a/gcc/c/c-lang.c
 +++ b/gcc/c/c-lang.c
 @@ -35,6 +35,12 @@ along with GCC; see the file COPYING3.  If not see
  
  enum c_language_kind c_language = clk_c;
  
 +static int
 +c_source_language (void)
 +{
 +  return flag_isoc99 ? 1999 : 1989;
 +}
 +
  /* Lang hooks common to C and ObjC are declared in c-objc-common.h;
 consequently, there should be very few hooks below.  */
  
 @@ -44,6 +50,8 @@ enum c_language_kind c_language = clk_c;
  #define LANG_HOOKS_INIT c_objc_common_init
  #undef LANG_HOOKS_INIT_TS
  #define LANG_HOOKS_INIT_TS c_common_init_ts
 +#undef LANG_HOOKS_SOURCE_LANGUAGE
 +#define LANG_HOOKS_SOURCE_LANGUAGE c_source_language
  
  /* Each front end provides its own lang hook initializer.  */
  struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER;
 diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
 index 59c05ed..4932cd0 100644
 --- a/gcc/dwarf2out.c
 +++ b/gcc/dwarf2out.c
 @@ -16708,9 +16708,18 @@ add_bit_size_attribute (dw_die_ref die, tree decl)
  static inline void
  add_prototyped_attribute (dw_die_ref die, tree func_type)
  {
 -  if (get_AT_unsigned (comp_unit_die (), DW_AT_language) == DW_LANG_C89
 -   prototype_p (func_type))
 -add_AT_flag (die, DW_AT_prototyped, 1);
 +  switch (get_AT_unsigned (comp_unit_die (), DW_AT_language))
 +{
 +case DW_LANG_C:
 +case DW_LANG_C89:
 +case DW_LANG_C99:
 +case DW_LANG_ObjC:
 +  if (prototype_p (func_type))
 + add_AT_flag (die, DW_AT_prototyped, 1);
 +  break;
 +default:
 +  break;
 +}
  }
  
  /* Add an 'abstract_origin' attribute below a given DIE.  The DIE is found
 @@ -19544,6 +19553,10 @@ gen_compile_unit_die (const char *filename)
   language = DW_LANG_ObjC;
else if (strcmp (language_string, GNU Objective-C++) == 0)
   language = DW_LANG_ObjC_plus_plus;
 +  else if (strcmp (language_string, GNU C) == 0
 + lang_hooks.source_language
 + lang_hooks.source_language () = 1999)
 + language = DW_LANG_C99;
else if (dwarf_version = 5 || !dwarf_strict)
   {
 if (strcmp (language_string, GNU Go) == 0)
 diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h
 index e5ae3e3..b6c8dd4 100644
 --- a/gcc/langhooks-def.h
 +++ b/gcc/langhooks-def.h
 @@ -120,6 +120,7 @@ extern bool lhd_omp_mappable_type (tree);
  #define LANG_HOOKS_BLOCK_MAY_FALLTHRUhook_bool_const_tree_true
  #define LANG_HOOKS_EH_USE_CXA_END_CLEANUPfalse
  #define LANG_HOOKS_DEEP_UNSHARINGfalse
 

Re: [PATCH] cleanups in line-map

2014-10-13 Thread Dodji Seketeli
Manuel López-Ibáñez lopeziba...@gmail.com writes:

 A few cleanups in line-map code. Bootstrapped and regression tested on
 x86_64-linux-gnu.

Thanks for doing this.

 OK?

Yes, barring this little nit:

[...]

 Index: libcpp/line-map.c
 ===
 --- libcpp/line-map.c (revision 216098)
 +++ libcpp/line-map.c (working copy)
 @@ -29,12 +29,10 @@ along with this program; see the file CO
  static void trace_include (const struct line_maps *, const struct line_map 
 *);
  static const struct line_map * linemap_ordinary_map_lookup (struct line_maps 
 *,
   source_location);
  static const struct line_map* linemap_macro_map_lookup (struct line_maps *,
   source_location);
 -static source_location linemap_macro_map_loc_to_def_point
 -(const struct line_map*, source_location);

This is not redundant per se, is it?  It's just a forward declaration of
the function that is defined later.  Just like for
linemap_macro_map_loc_unwind_toward_spelling() below.  Or what am I
missing?  I'd prefer to see this forward declaration stay, FWIW.

Otherwise, this cleanup patch looks good to me.  If it was my call, I'd
say OK with that change.

Thank you for tackling this.

-- 
Dodji


Re: [v3, patch] Move std::list::_M_size closer to iterators

2014-10-13 Thread Jonathan Wakely

On 12/10/14 21:41 +0200, Marc Glisse wrote:

Hello,

this patch moves _M_size in std::list to a place where it should be 
easier (no offsetof) to access from an iterator (it doesn't matter 
when you access it from the main std::list object). It does not take 
advantage of it yet, that can be done at any time, whereas the layout 
will soon be fixed. This triggered one of the issues listed in PR 
63345 (casting to a node* even for the sentinel), which I am fixing 
here.


Nice, OK for trunk, thanks.


Re: [PATCH] cleanups in line-map

2014-10-13 Thread Manuel López-Ibáñez
On 13 October 2014 10:52, Dodji Seketeli do...@redhat.com wrote:
 Manuel López-Ibáñez lopeziba...@gmail.com writes:

 Index: libcpp/line-map.c
 ===
 --- libcpp/line-map.c (revision 216098)
 +++ libcpp/line-map.c (working copy)
 @@ -29,12 +29,10 @@ along with this program; see the file CO
  static void trace_include (const struct line_maps *, const struct line_map 
 *);
  static const struct line_map * linemap_ordinary_map_lookup (struct 
 line_maps *,
   source_location);
  static const struct line_map* linemap_macro_map_lookup (struct line_maps *,
   source_location);
 -static source_location linemap_macro_map_loc_to_def_point
 -(const struct line_map*, source_location);

 This is not redundant per se, is it?  It's just a forward declaration of
 the function that is defined later.  Just like for
 linemap_macro_map_loc_unwind_toward_spelling() below.  Or what am I
 missing?  I'd prefer to see this forward declaration stay, FWIW.

Oh, well, I guess it is a matter of taste. I was annoyed by having to
update two different places (I added const to the first argument of
this definition function, so I will have to also add it here).
Moreover, as the patch shows, the two declarations might be different
(one was static, the other not), and then which one is the correct
one requires some expert knowledge of C++. I understand using forward
declarations when otherwise it would be a mess to re-order the
functions, but in this case, it is not really necessary. But I can
leave it and just update the argument type.

 Otherwise, this cleanup patch looks good to me.  If it was my call, I'd
 say OK with that change.

 Thank you for tackling this.

Thanks for the review.

Cheers,

Manuel.


Re: [PATCH,1/2] Extended if-conversion for loops marked with pragma omp simd.

2014-10-13 Thread Yuri Rumyantsev
Richard,

Here is updated patch (part1) for extended if conversion.

Second part of patch will be sent later.

Changelog.

2014-10-13  Yuri Rumyantsev  ysrum...@gmail.com

* tree-if-conv.c (cgraph.h): Add include file to detect function clone.
(flag_force_vectorize): New variable.
(edge_predicate): New function.
(set_edge_predicate): New function.
(add_to_predicate_list): Check unconditionally that bb is always
executed to early exit. Use predicate of cd-equivalent block
for join blocks if it exists.
(add_to_dst_predicate_list): Invoke add_to_predicate_list if
destination block of edge is not always executed. Set-up predicate
for critical edge.
(if_convertible_phi_p): Accept phi nodes with more than two args
if FLAG_FORCE_VECTORIZE was set-up.
(ifcvt_can_use_mask_load_store): Use FLAG_FORCE_VECTORIZE.
(if_convertible_stmt_p): Fix up pre-function comments.
(all_edges_are_critical): New function.
(if_convertible_bb_p): Allow bb has more than two predecessors if
FLAG_FORCE_VECTORIZE was set-up. Use call of all_edges_are_critical
to reject block if-conversion with incoming critical edges only if
FLAG_FORCE_VECTORIZE was not set-up.
(predicate_bbs): Skip loop exit block also. Add check that if
fold_build2 produces bool conversion, recompute predicate using
build2_loc. Add zeroing of edge 'aux' field under FLAG_FORCE_VECTORIZE.
(if_convertible_loop_p_1): Recompute POST_DOMINATOR tree if
FLAG_FORCE_VECTORIZE was set-up to calculate cd equivalent bb's.
(find_phi_replacement_condition): Extend function interface:
it returns NULL if given phi node must be handled by means of
extended phi node predication. If number of predecessors of phi-block
is equal 2 and atleast one incoming edge is not critical original
algorithm is used.
(get_predicate_for_edge): New function.
(find_insertion_point): New function.
(predicate_arbitrary_scalar_phi): New function.
(predicate_all_scalar_phis): Introduce new variable BEFORE.
Invoke find_insertion_point to initialize gsi and
predicate_arbitrary_scalar_phi if TRUE_BB is NULL - it signals
that extended predication must be applied).
(insert_gimplified_predicates): Add test for non-predicated basic
blocks that there are no gimplified statements to insert. Insert
predicates at the block begining for extended if-conversion.
(tree_if_conversion): Initialize flag_force_vectorize from current
loop or outer loop (to support pragma omp declare).Do loop versioning
for innermost loop marked with pragma omp simd and
FLAG_TREE_LOOP_IF_CONVERT was not sett-up. Nullify 'aux' field of edges
for blocks with two successors.




2014-09-22 12:28 GMT+04:00 Yuri Rumyantsev ysrum...@gmail.com:
 Richard,

 here is reduced patch (part.1) which was reduced almost twice.
 Let's me also answer on your comments.

 1. I really use edge field 'aux' to keep predicate for critical edges.
 My previous code was not correct and now it looks like:

   if (EDGE_COUNT (b-succs) == 1 || EDGE_COUNT (e-dest-preds) == 1)
 /* Edge E is not critical,  use predicate of edge source bb. */
 c = bb_predicate (b);
   else
 /* Edge E is critical and its aux field contains predicate.  */
 c = edge_predicate (e);

 2. I completely delete all code related to creation of conditional
 expressions and completely rely on bool pattern recognition in
 vectorizer. But we need to delete all dead predicate computations
 which are not used since they prevent vectorization. I will add this
 local-dce function in next patch.
 3. I also did not include in this patch recognition of general
 phi-nodes with two arguments only for which conversion of conditional
 scalar reduction can be applied also.
 Note that all these changes are applied for loop marked with pragma
 omp simd only.

 2014-09-22  Yuri Rumyantsev  ysrum...@gmail.com

 * tree-if-conv.c (cgraph.h): Add include file to detect function clone.
 (flag_force_vectorize): New variable.
 (edge_predicate): New function.
 (set_edge_predicate): New function.
 (convert_name_to_cmp): New function.
 (add_to_predicate_list): Check unconditionally that bb is always
 executed to early exit. Use predicate of cd-equivalent block
 for join blocks if it exists.
 (add_to_dst_predicate_list): Invoke add_to_predicate_list if
 destination block of edge is not always executed. Set-up predicate
 for critical edge.
 (if_convertible_phi_p): Accept phi nodes with more than two args
 if FLAG_FORCE_VECTORIZE was set-up.
 (ifcvt_can_use_mask_load_store): Use FLAG_FORCE_VECTORIZE.
 (if_convertible_stmt_p): Fix up pre-function comments.
 (all_edges_are_critical): New function.
 (if_convertible_bb_p): Allow bb has more than two predecessors if
 FLAG_FORCE_VECTORIZE was set-up. Use call of all_edges_are_critical
 to reject block if-conversion with incoming critical edges only if
 FLAG_FORCE_VECTORIZE was not set-up.
 (predicate_bbs): Skip loop exit block also. Add check that if
 fold_build2 produces bool conversion, recompute predicate using
 build2_loc. Add zeroing of edge 'aux' field under 

[PATCH, 2/2] Extended if-conversion for loops marked with pragma omp simd.

2014-10-13 Thread Yuri Rumyantsev
Richard,

Here is second part of patch which includes
1. One particular phi node recognition  - if phi function has more
than 2 arguments but it has only two really different arguments and
one argument has the only occurrence. This is important for
conditional scalar reduction conversion, e.g. for such test-case;
 if (a[i] != 0  b[i] != 0)  n++;
2. New sub-pass which repairs bool pattern candidates with multiple
uses - such situation occurres if the same predicate (not simple
compare) is used for phi node conversion and load/store mask. If for
some var participating in tree traversal its def stmt has multiple
uses we create copy of this definition with unique left hand side and
change one use of original vat to newly created one. We repeat this
process until all multiple uses will be deleted.
3. Another sub-pass which  delete redundant predicate computations
which are dead locally, i.e. local dead code elimination. Note that
such dead code can prevent loop vectorization.

Changelog:

2014-10-13  Yuri Rumyantsev  ysrum...@gmail.com

* tree-if-conv.c (cgraph.h): Add include file to issue error message.
(phi_has_two_different_args): New function.
(is_cond_scalar_reduction): Add argument EXTENDED to choose access
to phi arguments. Invoke phi_has_two_different_args to get phi
arguments iff EXTENDED is true. Change check stmt-block is predecessor
of phi-block since phi may haave more than two arguments.
(convert_scalar_cond_reduction): Add argument BEFORE to insert
statement before/after gsi point.
(predicate_scalar_phi): Add argument false to call of
is_cond_scalar_reduction. Add argument true to call of
convert_scalar_cond_reduction.
(predicate_arbitrary_scalar_phi): Change result of function to tree
representing rhs of new phi replacement stmt.
(predicate_extended_scalar_phi): New function.
(predicate_all_scalar_phis): Invoke predicate_extended_scalar_phi
instead of predicate_arbitrary_scalar_phi.
(ifcvt_split_def_stmt): New function.
(ifcvt_walk_pattern_tree): New function.
(stmt_is_root_of_bool_pattern): New function.
(ifcvt_repair_bool_pattern): New function.
(ifcvt_local_dce): New function.
(tree_if_conversion): Invoke ifcvt_local_dce and
ifcvt_repair_bool_pattern under FLAG_FORCE_VECTORIZE.


patch.part-2
Description: Binary data


[AArch64] [BE] [2/2] Make large opaque integer modes endianness-safe.

2014-10-13 Thread David Sherwood
Hi,

This is the second patch of the work to fix:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59810

and removes the CANNOT_CHANGE_MODE_CLASS macro, which now permits subregs of
vector registers to work correctly on aarch64_be.

NOTE: This patch depends upon the following:
[AArch64] [BE] [1/2] Make large opaque integer modes endianness-safe.
[AArch64] [BE] Fix vector load/stores to not use ld1/st1

Thanks,
David.

ChangeLog:

gcc/:
2014-13-10  David Sherwood  david.sherw...@arm.com

* config/aarch64/aarch64.h (CLEAR_INSN_CACHE): Removed.
* config/aarch64/aarch64.c (aarch64_cannot_change_mode_class): Removed.
* config/aarch64/aarch64-protos.h (aarch64_cannot_change_mode_class):
Removed.


ccmc_v2.patch
Description: Binary data


Re: [PATCH 1/2] libstdc++: Add std::align.

2014-10-13 Thread Jonathan Wakely

On 16/04/14 17:06 +0200, Rüdiger Sonderfeld wrote:

C++11 [ptr.align].

This should probably not be inline.  But for now this avoids any ABI
changes.

* libstdc++-v3/testsuite/20_util/align/1.cc: New file.
* libstdc++-v3/include/std/memory (align): New function.


Fixed (the new function should be in namespace std!) and applied to
trunk. Thanks for the patch.

Tested x86_64-linux.
commit d49cd05620599277845f89325ac8a43622650e8f
Author: Jonathan Wakely jwak...@redhat.com
Date:   Mon Oct 13 10:50:16 2014 +0100

2014-10-13  R??diger Sonderfeld  ruedi...@c-plusplus.de

	* include/std/memory (align): Define.
	* testsuite/20_util/align/1.cc: New.

diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory
index dafec0c..affc8b1 100644
--- a/libstdc++-v3/include/std/memory
+++ b/libstdc++-v3/include/std/memory
@@ -87,4 +87,46 @@
 #  include backward/auto_ptr.h
 #endif
 
+#if __cplusplus = 201103L
+#  include cstdint
+#  ifdef _GLIBCXX_USE_C99_STDINT_TR1
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+/**
+ *  @brief Fit aligned storage in buffer.
+ *
+ *  [ptr.align]
+ *
+ *  This function tries to fit @a __size bytes of storage with alignment
+ *  @a __align into the buffer @a __ptr of size @a __space bytes.  If such
+ *  a buffer fits then @a __ptr is changed to point to the first byte of the
+ *  aligned storage and @a __space is reduced by the bytes used for alignment.
+ *
+ *  @param __align   A fundamental or extended alignment value.
+ *  @param __sizeSize of the aligned storage required.
+ *  @param __ptr Pointer to a buffer of @a __space bytes.
+ *  @param __space   Size of the buffer pointed to by @a __ptr.
+ *  @return the updated pointer if the aligned storage fits, otherwise nullptr.
+ */
+inline void*
+align(size_t __align, size_t __size, void* __ptr, size_t __space) noexcept
+{
+  const size_t __diff = __align - reinterpret_castuintptr_t(__ptr) % __align;
+  if (__diff + __size = __space)
+return nullptr;
+  else
+{
+  __space -= __diff;
+  __ptr = static_castchar*(__ptr) + __diff;
+  return __ptr;
+}
+}
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace
+#endif // _GLIBCXX_USE_C99_STDINT_TR1
+#endif // C++11
+
 #endif /* _GLIBCXX_MEMORY */
diff --git a/libstdc++-v3/testsuite/20_util/align/1.cc b/libstdc++-v3/testsuite/20_util/align/1.cc
new file mode 100644
index 000..d1f94e9
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/align/1.cc
@@ -0,0 +1,61 @@
+// { dg-options  -std=gnu++11  }
+
+// 2014-04-16 R??diger Sonderfeld  ruedi...@c-plusplus.de
+
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 3, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+// C++11 [ptr.align] (20.6.5): std::align
+
+#include memory
+#include cstdint
+#include testsuite_hooks.h
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  size_t space = 100;
+  void* ptr = new char[space];
+  char* const orig_ptr = static_castchar*(ptr);
+  char* old_ptr = orig_ptr;
+  const size_t orig_space = space;
+  size_t old_space = space;
+  const size_t alignment = 16;
+  const size_t size = 10;
+  while( void* const r = std::align(alignment, size, ptr, space) )
+{
+  VERIFY( r == ptr );
+  uintptr_t p = reinterpret_castuintptr_t(ptr);
+  VERIFY( p % alignment == 0 );
+  char* const x = static_castchar*(ptr);
+  VERIFY( x - old_ptr == old_space - space );
+  VERIFY( (void*)x  (void*)(orig_ptr + orig_space) );
+  VERIFY( (void*)(x + size)  (void*)(orig_ptr + orig_space) );
+  ptr = x + size;
+  old_ptr = x;
+  old_space = space;
+  space -= size;
+}
+  delete [] orig_ptr;
+}
+
+int main()
+{
+  test01();
+}


Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling

2014-10-13 Thread Jakub Jelinek
On Sat, Oct 11, 2014 at 06:49:00PM +0400, Ilya Verbin wrote:
 This is the last common infrastructure patch in the series.
 (Next patches will contain tests for libgomp testsuite and MIC specific 
 things)
 
 It introduces 2 new options:
 1. -foffload=targets=options
By default, GCC will build offload images for all offload targets specified
 in configure, with non-target-specific options passed to host compiler.
 This option is used to control offload targets and options for them.
 
 It can be used in a few ways:
 * -foffload=disable
   Tells GCC to disable offload support.
   OpenMP target regions will be run in host fallback mode.
 * -foffload=targets
   Tells GCC to build offload images for targets.
   They will be built with non-target-specific options passed to host compiler.
 * -foffload=options
   Tells GCC to build offload images for all targets specified in configure. 
   They will be built with non-target-specific options passed to host compiler
   plus options.
 * -foffload=targets=options
   Tells GCC to build offload images for targets.
   They will be built with non-target-specific options passed to host compiler
   plus options.
 
 Options specified by -foffload are appended to the end of option set, so in 
 case
 of option conflicts they have more priority.

This looks good to me.

 2. -foffload-abi=[lp64|ilp32]
This option is supposed to tell mkoffload (and offload compiler) which ABI 
 is
 used in streamed GIMPLE.  This option is desirable, because host and offload
 compilers must have the same ABI.  The option is generated by the host 
 compiler
 automatically, it should not be specified by user.

But I'd like to understand why is this one needed.
Why should the compilers care?  Aggregates layout and alignment of
integral/floating types must match between host and offload compilers, sure,
but isn't that something streamed already in the LTO bytecode?
Or is LTO streamer not streaming some types like long_type_node?
I'd expect if host and offload compiler disagree on long type size that
you'd just use a different integral type with the same size as long on the
host.
Different sized pointers are of course a bigger problem, but can't you just
error out on that during reading of the LTO, or even handle it (just use
some integral type for when is the pointer stored in memory, and just
convert to pointer after reads from memory, and convert back before storing
to memory).  Erroring out during LTO streaming in sounds just fine to me
though.

Jakub


[PATCHv5][PING] Vimrc config with GNU formatting

2014-10-13 Thread Yury Gribov

On 10/02/2014 09:14 PM, Yury Gribov wrote:

On 09/17/2014 09:08 PM, Yury Gribov wrote:
  On 09/16/2014 08:38 PM, Yury Gribov wrote:
  Hi all,
 
  This is the third version of the patch. A list of changes since last
  version:
  * move config to contrib so that it's _not_ enabled by default (current
  score is 2/1 in favor of no Vim config by default)
  * update Makefile.in to make .local.vimrc if developer asks for it
  * disable autoformatting for flex files
  * fix filtering of non-GNU sources (libsanitizer)
  * added some small fixes in cinoptions based on feedback from community
 
  As noted by Richard, the config does not do a good job of formatting
  unbound {} blocks e.g.
  void
  foo ()
  {
 int x;
   {
 // I'm an example of bad bad formatting
   }
  }
  but it seems to be the best we can get with Vim's cindent
  (and I don't think anyone seriously considers writing a custom
  indentexpr).
 
  Ok to commit?
 
  New vesion with support for another popular local .vimrc plugin.

Hi all,

Here is a new vesion of vimrc patch. Hope I got email settings right
this time.

Changes since v4:
* fixed and enhanced docs
* added support for .lvimrc in Makefile
* minor fixes in cinoptions and formatoptions (reported by Segher)
* removed shiftwidth settings (as it does not really relate to code
formatting)

-Y



commit 3f560e9dd16a5e914b6f2ba82edffe13dfde944c
Author: Yury Gribov y.gri...@samsung.com
Date:   Thu Oct 2 15:50:52 2014 +0400

2014-10-02  Laurynas Biveinis  laurynas.bivei...@gmail.com
	Yury Gribov  y.gri...@samsung.com

Vim config with GNU formatting.

contrib/
	* vimrc: New file.

/
	* .gitignore: Added .local.vimrc and .lvimrc.
	* Makefile.tpl (vimrc, .lvimrc, .local.vimrc): New targets.
	* Makefile.in: Regenerate.

diff --git a/.gitignore b/.gitignore
index e9b56be..ab97ac6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,9 @@ POTFILES
 TAGS
 TAGS.sub
 
+.local.vimrc
+.lvimrc
+
 .gdbinit
 .gdb_history
 
diff --git a/Makefile.in b/Makefile.in
index d6105b3..f3a34af 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -2384,6 +2384,18 @@ mail-report-with-warnings.log: warning.log
 	chmod +x $@
 	echo If you really want to send e-mail, run ./$@ now
 
+# Local Vim config
+
+$(srcdir)/.local.vimrc:
+	$(LN_S) $(srcdir)/contrib/vimrc $@
+
+$(srcdir)/.lvimrc:
+	$(LN_S) $(srcdir)/contrib/vimrc $@
+
+vimrc: $(srcdir)/.local.vimrc $(srcdir)/.lvimrc
+
+.PHONY: vimrc
+
 # Installation targets.
 
 .PHONY: install uninstall
diff --git a/Makefile.tpl b/Makefile.tpl
index f7c7e38..b98930c 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -867,6 +867,18 @@ mail-report-with-warnings.log: warning.log
 	chmod +x $@
 	echo If you really want to send e-mail, run ./$@ now
 
+# Local Vim config
+
+$(srcdir)/.local.vimrc:
+	$(LN_S) $(srcdir)/contrib/vimrc $@
+
+$(srcdir)/.lvimrc:
+	$(LN_S) $(srcdir)/contrib/vimrc $@
+
+vimrc: $(srcdir)/.local.vimrc $(srcdir)/.lvimrc
+
+.PHONY: vimrc
+
 # Installation targets.
 
 .PHONY: install uninstall
diff --git a/contrib/vimrc b/contrib/vimrc
new file mode 100644
index 000..34e8f35
--- /dev/null
+++ b/contrib/vimrc
@@ -0,0 +1,45 @@
+ Code formatting settings for Vim.
+
+ To enable this for GCC files by default, you can either source this file
+ in your .vimrc via autocmd:
+   :au BufNewFile,BufReadPost path/to/gcc/* :so path/to/gcc/contrib/vimrc
+ or source the script manually for each newly opened file:
+   :so contrib/vimrc
+ You could also use numerous plugins that enable local vimrc e.g.
+ mbr's localvimrc or thinca's vim-localrc (but note that the latter
+ is much less secure). To install local vimrc config, run
+   $ make vimrc
+ from GCC build folder.
+ 
+ Copyright (C) 2014 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see http://www.gnu.org/licenses/.
+
+function! SetStyle()
+  let l:fname = expand(%:p)
+  if stridx(l:fname, 'libsanitizer') != -1
+return
+  endif
+  let l:ext = fnamemodify(l:fname, :e)
+  let l:c_exts = ['c', 'h', 'cpp', 'cc', 'C', 'H', 'def', 'java']
+  if index(l:c_exts, l:ext) != -1
+setlocal cindent
+setlocal softtabstop=2
+setlocal cinoptions=4,n-2,{2,^-2,:2,=2,g0,f0,h2,p4,t0,+2,(0,u0,w1,m0
+setlocal textwidth=80
+setlocal formatoptions-=ro formatoptions+=cqlt
+  endif
+endfunction
+
+call SetStyle()


Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling

2014-10-13 Thread Ilya Verbin
On 13 Oct 12:19, Jakub Jelinek wrote:
 But I'd like to understand why is this one needed.
 Why should the compilers care?  Aggregates layout and alignment of
 integral/floating types must match between host and offload compilers, sure,
 but isn't that something streamed already in the LTO bytecode?
 Or is LTO streamer not streaming some types like long_type_node?
 I'd expect if host and offload compiler disagree on long type size that
 you'd just use a different integral type with the same size as long on the
 host.
 Different sized pointers are of course a bigger problem, but can't you just
 error out on that during reading of the LTO, or even handle it (just use
 some integral type for when is the pointer stored in memory, and just
 convert to pointer after reads from memory, and convert back before storing
 to memory).  Erroring out during LTO streaming in sounds just fine to me
 though.

Actually this option was developed by Bernd, so I think PTX team is going to use
it somehow.  In MIC's case we're planning just to check in mkoffload that host
and target compiler's ABI are the same.  Without this check we will crash in LTO
streamer with ICE, so I'd like to issue an error message, rather than crashing.

  -- Ilya


Re: Fix libgomp crash without TLS (PR42616)

2014-10-13 Thread Varvara Rainchik
 Now, I wonder on which OS and why does config/tls.m4 CHECK_GCC_TLS
 actually fail?  Can you figure that out?


On Android check passes with --disable-tls (standard while building
gcc for Android as TLS is not supported in bionic) and fails with
--enable-tls (i686-linux-android/libgomp/conftest.c:32: undefined
reference to `___tls_get_addr'). So, HAVE_TLS is not defined in both
cases.

 If we get rid of HAVE_TLS code altogether, we might lose support of
 some very old OSes, e.g. some Linux distros with a recent gcc and binutils
 (so that emutls isn't used), but very old glibc (that doesn't support
 TLS or supports it incorrectly, think of pre-2002 glibc).  So, if we get
 rid of !HAVE_TLS code in libgomp, it would be nice if config/tls.m4 detected
 it properly and we'd just fail at configure time.

How can we check this in config/tls.m4? Can we just combine tests on
TLS and emutls? E.g. check whether HAVE_TLS and USE_EMUTLS are both
defined.

 And if we don't, just make sure that on Android, Darwin and/or M$Win (or
 whatever other OS you had in mind which does support pthreads, but doesn't
 support native TLS) find out why HAVE_AS_TLS is not defined (guess
 config.log should explain that).

HAVE_AS_TLS is also not defined for Android as it depends on --enable-tls.


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [6/n] Instrument calls and returns

2014-10-13 Thread Ilya Enkovich
On 10 Oct 12:50, Jeff Law wrote:
 On 10/08/14 13:04, Ilya Enkovich wrote:
 Hi,
 
 This patch adds intrumentation of calls and returns into instrumentation 
 pass.
 
 Thanks,
 Ilya
 --
 2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com
 
  * tree-chkp.c (chkp_add_bounds_to_ret_stmt): New.
  (chkp_replace_address_check_builtin): New.
  (chkp_replace_extract_builtin): New.
  (chkp_find_bounds_for_elem): New.
  (chkp_add_bounds_to_call_stmt): New.
  (chkp_instrument_function): Instrument rets and calls.
 
 
 [ snip ]
 
 +  /* Additionall we need to add bounds
 s/Additionall/Additionally/
 
 OK with that nit fixed.
 
 jeff

Here is a fixed version.

Thanks,
Ilya
--
diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
index 12f8f4a..0d4577d1 100644
--- a/gcc/tree-chkp.c
+++ b/gcc/tree-chkp.c
@@ -1047,6 +1047,29 @@ chkp_get_registered_bounds (tree ptr)
   return slot ? *slot : NULL_TREE;
 }
 
+/* Add bound retvals to return statement pointed by GSI.  */
+
+static void
+chkp_add_bounds_to_ret_stmt (gimple_stmt_iterator *gsi)
+{
+  gimple ret = gsi_stmt (*gsi);
+  tree retval = gimple_return_retval (ret);
+  tree ret_decl = DECL_RESULT (cfun-decl);
+  tree bounds;
+
+  if (!retval)
+return;
+
+  if (BOUNDED_P (ret_decl))
+{
+  bounds = chkp_find_bounds (retval, gsi);
+  bounds = chkp_maybe_copy_and_register_bounds (ret_decl, bounds);
+  gimple_return_set_retbnd (ret, bounds);
+}
+
+  update_stmt (ret);
+}
+
 /* Force OP to be suitable for using as an argument for call.
New statements (if any) go to SEQ.  */
 static tree
@@ -1169,6 +1192,64 @@ chkp_check_mem_access (tree first, tree last, tree 
bounds,
   chkp_check_upper (last, bounds, iter, location, dirflag);
 }
 
+/* Replace call to _bnd_chk_* pointed by GSI with
+   bndcu and bndcl calls.  DIRFLAG determines whether
+   check is for read or write.  */
+
+void
+chkp_replace_address_check_builtin (gimple_stmt_iterator *gsi,
+   tree dirflag)
+{
+  gimple_stmt_iterator call_iter = *gsi;
+  gimple call = gsi_stmt (*gsi);
+  tree fndecl = gimple_call_fndecl (call);
+  tree addr = gimple_call_arg (call, 0);
+  tree bounds = chkp_find_bounds (addr, gsi);
+
+  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_LBOUNDS
+  || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_BOUNDS)
+chkp_check_lower (addr, bounds, *gsi, gimple_location (call), dirflag);
+
+  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_UBOUNDS)
+chkp_check_upper (addr, bounds, *gsi, gimple_location (call), dirflag);
+
+  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_BOUNDS)
+{
+  tree size = gimple_call_arg (call, 1);
+  addr = fold_build_pointer_plus (addr, size);
+  addr = fold_build_pointer_plus_hwi (addr, -1);
+  chkp_check_upper (addr, bounds, *gsi, gimple_location (call), dirflag);
+}
+
+  gsi_remove (call_iter, true);
+}
+
+/* Replace call to _bnd_get_ptr_* pointed by GSI with
+   corresponding bounds extract call.  */
+
+void
+chkp_replace_extract_builtin (gimple_stmt_iterator *gsi)
+{
+  gimple call = gsi_stmt (*gsi);
+  tree fndecl = gimple_call_fndecl (call);
+  tree addr = gimple_call_arg (call, 0);
+  tree bounds = chkp_find_bounds (addr, gsi);
+  gimple extract;
+
+  if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_GET_PTR_LBOUND)
+fndecl = chkp_extract_lower_fndecl;
+  else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_GET_PTR_UBOUND)
+fndecl = chkp_extract_upper_fndecl;
+  else
+gcc_unreachable ();
+
+  extract = gimple_build_call (fndecl, 1, bounds);
+  gimple_call_set_lhs (extract, gimple_call_lhs (call));
+  chkp_mark_stmt (extract);
+
+  gsi_replace (gsi, extract, false);
+}
+
 /* Return COMPONENT_REF accessing FIELD in OBJ.  */
 static tree
 chkp_build_component_ref (tree obj, tree field)
@@ -1247,6 +1328,82 @@ chkp_can_be_shared (tree t)
   return false;
 }
 
+/* Helper function for chkp_add_bounds_to_call_stmt.
+   Fill ALL_BOUNDS output array with created bounds.
+
+   OFFS is used for recursive calls and holds basic
+   offset of TYPE in outer structure in bits.
+
+   ITER points a position where bounds are searched.
+
+   ALL_BOUNDS[i] is filled with elem bounds if there
+   is a field in TYPE which has pointer type and offset
+   equal to i * POINTER_SIZE in bits.  */
+static void
+chkp_find_bounds_for_elem (tree elem, tree *all_bounds,
+  HOST_WIDE_INT offs,
+  gimple_stmt_iterator *iter)
+{
+  tree type = TREE_TYPE (elem);
+
+  if (BOUNDED_TYPE_P (type))
+{
+  if (!all_bounds[offs / POINTER_SIZE])
+   {
+ tree temp = make_temp_ssa_name (type, gimple_build_nop (), );
+ gimple assign = gimple_build_assign (temp, elem);
+ gimple_stmt_iterator gsi;
+
+ gsi_insert_before (iter, assign, GSI_SAME_STMT);
+ gsi = gsi_for_stmt (assign);
+
+ all_bounds[offs / POINTER_SIZE] = chkp_find_bounds (temp, 

Re: [PATCH, Pointer Bounds Checker 14/x] Passes [7/n] Instrument params

2014-10-13 Thread Ilya Enkovich
On 10 Oct 12:10, Jeff Law wrote:
 On 10/08/14 13:06, Ilya Enkovich wrote:
 Hi,
 
 This patch adds bounds initialization for address taken input arguments.
 
 Thanks,
 Ilya
 --
 2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com
 
  * tree-chkp.c (chkp_instrument_function): Store bounds for
  address taken args.
 
 
 diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
 index 6bbceb0..5443950 100644
 --- a/gcc/tree-chkp.c
 +++ b/gcc/tree-chkp.c
 @@ -3758,6 +3758,51 @@ chkp_instrument_function (void)
 bb = next;
   }
 while (bb);
 +
 +  /* Some input params may have bounds and be address taken.  In this case
 + we should store incomping bounds into bounds table.  */
 s/incomping/incoming/
 
 With that nit fixed this is OK.
 
 jeff
 

Here is a fixed version.

Thanks,
Ilya
--
diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
index 0d4577d1..53b3386 100644
--- a/gcc/tree-chkp.c
+++ b/gcc/tree-chkp.c
@@ -3758,6 +3758,51 @@ chkp_instrument_function (void)
   bb = next;
 }
   while (bb);
+
+  /* Some input params may have bounds and be address taken.  In this case
+ we should store incoming bounds into bounds table.  */
+  tree arg;
+  if (flag_chkp_store_bounds)
+for (arg = DECL_ARGUMENTS (cfun-decl); arg; arg = DECL_CHAIN (arg))
+  if (TREE_ADDRESSABLE (arg))
+   {
+ if (BOUNDED_P (arg))
+   {
+ tree bounds = chkp_get_next_bounds_parm (arg);
+ tree def_ptr = ssa_default_def (cfun, arg);
+ gimple_stmt_iterator iter
+   = gsi_start_bb (chkp_get_entry_block ());
+ chkp_build_bndstx (chkp_build_addr_expr (arg),
+def_ptr ? def_ptr : arg,
+bounds, iter);
+
+ /* Skip bounds arg.  */
+ arg = TREE_CHAIN (arg);
+   }
+ else if (chkp_type_has_pointer (TREE_TYPE (arg)))
+   {
+ tree orig_arg = arg;
+ bitmap slots = chkp_find_bound_slots (TREE_TYPE (arg));
+ gimple_stmt_iterator iter
+   = gsi_start_bb (chkp_get_entry_block ());
+ bitmap_iterator bi;
+ unsigned bnd_no;
+
+ EXECUTE_IF_SET_IN_BITMAP (slots, 0, bnd_no, bi)
+   {
+ tree bounds = chkp_get_next_bounds_parm (arg);
+ HOST_WIDE_INT offs = bnd_no * POINTER_SIZE / BITS_PER_UNIT;
+ tree addr = chkp_build_addr_expr (orig_arg);
+ tree ptr = build2 (MEM_REF, ptr_type_node, addr,
+build_int_cst (ptr_type_node, offs));
+ chkp_build_bndstx (chkp_build_addr_expr (ptr), ptr,
+bounds, iter);
+
+ arg = DECL_CHAIN (arg);
+   }
+ BITMAP_FREE (slots);
+   }
+   }
 }
 
 /* Initialize pass.  */


Re: [PATCH x86] Increase PARAM_MAX_COMPLETELY_PEELED_INSNS when branch is costly

2014-10-13 Thread Evgeny Stupachenko
I need to collect data from Haswell, but the patch should not help
it's performance much, just increase code size.

On Mon, Oct 13, 2014 at 12:01 PM, Richard Biener
richard.guent...@gmail.com wrote:
 On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko evstu...@gmail.com 
 wrote:
 Hi,

 The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with
 high branch cost.
 Bootstrap and make check are in progress.
 The patch boosts (up to 2,5 times improve) several benchmarks compiled
 with -Ofast on Silvermont
 Spec2000:
 +5% gain on 173.applu
 +1% gain on 255.vortex

 Is it ok for trunk when pass bootstrap and make check?

 This is only a 20% increase - from 100 to 120.  I would instead suggest
 to explore doing this change unconditionally if it helps that much.

 Richard.

 Thanks,
 Evgeny

 2014-10-10  Evgeny Stupachenko  evstu...@gmail.com
 * config/i386/i386.c (ix86_option_override_internal): Increase
 PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost.
 * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New.
 * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates
 CPUs with high branch cost.

 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
 index 6337aa5..5ac10eb 100644
 --- a/gcc/config/i386/i386.c
 +++ b/gcc/config/i386/i386.c
 @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p,
  opts-x_param_values,
  opts_set-x_param_values);

 +  /* Extend full peel max insns parameter for CPUs with high branch cost.  
 */
 +  if (TARGET_HIGH_BRANCH_COST)
 +maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
 +  120,
 +  opts-x_param_values,
 +  opts_set-x_param_values);
 +
 +
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
if (opts-x_flag_prefetch_loop_arrays  0
 HAVE_prefetch
 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
 index 2c64162..da0c57b 100644
 --- a/gcc/config/i386/i386.h
 +++ b/gcc/config/i386/i386.h
 @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
  #define TARGET_INTER_UNIT_CONVERSIONS \
 ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
  #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
 +#define TARGET_HIGH_BRANCH_COST
 ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST]
  #define TARGET_SCHEDULEix86_tune_features[X86_TUNE_SCHEDULE]
  #define TARGET_USE_BT  ix86_tune_features[X86_TUNE_USE_BT]
  #define TARGET_USE_INCDEC  ix86_tune_features[X86_TUNE_USE_INCDEC]
 diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
 index b6b210e..04d8bf8 100644
 --- a/gcc/config/i386/x86-tune.def
 +++ b/gcc/config/i386/x86-tune.def
 @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, four_jump_limit,
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
   m_ATHLON_K8 | m_AMDFAM10)

 +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost.  This 
 could be
 +   used to tune unroll, if-cvt, inline... heuristics.  */
 +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, high_branch_cost,
 +  m_BONNELL | m_SILVERMONT | m_INTEL)
 +
  
 /*/
  /* Integer instruction selection tuning 
  */
  
 /*/


Re: [PATCH] cleanup in c-parser

2014-10-13 Thread Anthony Brandon
I updated the patch with a comment. Actually, Manuel handed me this
patch just to help me get familiar with the process of submitting and
testing.
Generating this one with git diff looks different so I'm not sure if
that's a problem or not.

Thanks,
Anthony

On Sun, Oct 12, 2014 at 10:09 PM,  pins...@gmail.com wrote:




 On Oct 12, 2014, at 12:37 PM, Anthony Brandon anthony.bran...@gmail.com 
 wrote:

 Hi,

 I'm a new contributor and I don't yet have a copyright assignment or
 commit access.


 Thanks for you contribution.  Your new function is missing a comment before 
 it saying what it does. Yes it might be obvious what the function does but 
 the coding style requires it.

 Thanks,
 Andrew


 This is a cleanup of code duplication in c-parser.
 I bootstrapped and tested on x86_64-linux.


 gcc/c/ChangeLog:

 2014-10-12  Anthony Brandon  anthony.bran...@gmail.com

* c-parser.c (c_parser_all_labels): New function to replace
 the duplicate code.
(c_parser_statement): Call the new function.
 cleanup.diff



-- 
Anthony
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 0d159fd..346448a 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -4654,6 +4654,18 @@ c_parser_compound_statement_nostart (c_parser *parser)
   mark_valid_location_for_stdc_pragma (save_valid_for_pragma);
 }
 
+/* Parse all consecutive labels. */
+
+static void
+c_parser_all_labels (c_parser *parser)
+{
+  while (c_parser_next_token_is_keyword (parser, RID_CASE)
+|| c_parser_next_token_is_keyword (parser, RID_DEFAULT)
+|| (c_parser_next_token_is (parser, CPP_NAME)
+ c_parser_peek_2nd_token (parser)-type == CPP_COLON))
+c_parser_label (parser);
+}
+
 /* Parse a label (C90 6.6.1, C99 6.8.1).
 
label:
@@ -4854,11 +4866,7 @@ c_parser_label (c_parser *parser)
 static void
 c_parser_statement (c_parser *parser)
 {
-  while (c_parser_next_token_is_keyword (parser, RID_CASE)
-|| c_parser_next_token_is_keyword (parser, RID_DEFAULT)
-|| (c_parser_next_token_is (parser, CPP_NAME)
- c_parser_peek_2nd_token (parser)-type == CPP_COLON))
-c_parser_label (parser);
+  c_parser_all_labels (parser);
   c_parser_statement_after_labels (parser);
 }
 
@@ -5090,11 +5098,7 @@ c_parser_if_body (c_parser *parser, bool *if_p)
 {
   tree block = c_begin_compound_stmt (flag_isoc99);
   location_t body_loc = c_parser_peek_token (parser)-location;
-  while (c_parser_next_token_is_keyword (parser, RID_CASE)
-|| c_parser_next_token_is_keyword (parser, RID_DEFAULT)
-|| (c_parser_next_token_is (parser, CPP_NAME)
- c_parser_peek_2nd_token (parser)-type == CPP_COLON))
-c_parser_label (parser);
+  c_parser_all_labels (parser);
   *if_p = c_parser_next_token_is_keyword (parser, RID_IF);
   if (c_parser_next_token_is (parser, CPP_SEMICOLON))
 {
@@ -5121,11 +5125,7 @@ c_parser_else_body (c_parser *parser)
 {
   location_t else_loc = c_parser_peek_token (parser)-location;
   tree block = c_begin_compound_stmt (flag_isoc99);
-  while (c_parser_next_token_is_keyword (parser, RID_CASE)
-|| c_parser_next_token_is_keyword (parser, RID_DEFAULT)
-|| (c_parser_next_token_is (parser, CPP_NAME)
- c_parser_peek_2nd_token (parser)-type == CPP_COLON))
-c_parser_label (parser);
+  c_parser_all_labels (parser);
   if (c_parser_next_token_is (parser, CPP_SEMICOLON))
 {
   location_t loc = c_parser_peek_token (parser)-location;


Re: [PATCH 1/2] libstdc++: Add std::align.

2014-10-13 Thread Jonathan Wakely

On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote:

On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote:

Fixed (the new function should be in namespace std!) and applied to
trunk. Thanks for the patch.


Thanks for merging and improving my patches!


Sorry it's taken so long, I needed to get some of the ABI transition
work done first.


If you have the time maybe you could also look at the put_time patch.  Unlike
get_time it does not require an ABI change.


OK great.

I also revisited your codecvt proposal, but I don't think you ever
sent a patch, only the RFC about the design. Do you have a finished
patch?



Re: [PATCH, Pointer Bounds Checker 14/x] Passes [10/n] Stores handler

2014-10-13 Thread Ilya Enkovich
2014-10-09 22:51 GMT+04:00 Jeff Law l...@redhat.com:
 On 10/08/14 13:12, Ilya Enkovich wrote:

 Hi,

 This patch adds an assignment processing function which is used by lnliner
 for newly generated stores.

 Thanks,
 Ilya
 --
 2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com

 * tree-chkp.c (chkp_copy_bounds_for_assign): New.
 * tree-chkp.h (chkp_copy_bounds_for_assign): New.

 This probably should have been part of the inliner submission since that's
 the only place its used and one needs the inliner context to know how this
 function is going to be used.

 Presumably the reason its not in tree-inline and static is you want to
 utilize chkp_walk_pointer_assignments?

 The code is fine, just want to make sure its goes into a logical place.

 Jeff



I have to export either chkp_copy_bounds_for_assign or
chkp_walk_pointer_assignments with chkp_copy_bounds_for_elem.  No much
difference but I'd prefer to keep all memrefs processing codes in
tree-chkp.c.

Ilya


Re: [libstdc++] Add xmethods for array, deque, forward_list, list and vector.

2014-10-13 Thread Jonathan Wakely

On 12/10/14 06:49 -0700, Siva Chandra wrote:

Hello,

Attached is a patch which adds xmethods for std::array, std::deque,
std::forward_list, std::list and std::vector.  There were already
couple of xmethods existing for std::vector, but this patch adds more
over them.


Committed - thanks!


Re: [PATCH 2/3] libstdc++: Add put_time support.

2014-10-13 Thread Jonathan Wakely

On 15/04/14 23:20 +0200, Rüdiger Sonderfeld wrote:

Described in [ext.manip].

* libstdc++-v3/include/std/iomanip (_Put_time): New struct.
 (put_time): New manipulator.
 (operator): New overloaded function.
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/1.cc:
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc:
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/1.cc:
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/2.cc:
 New file.


The 27_io/manipulators/extended/put_time/char/2.cc and
27_io/manipulators/extended/put_time/wchar_t/2.cc tests fail for me.

i2.exe: 
/home/jwakely/src/gcc/gcc/libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc:41:
 void test01(): Assertion `oss.str() == Son 1971' failed.
FAIL: 27_io/manipulators/extended/put_time/char/2.cc execution test



Re: [PATCH 1/2] libstdc++: Add std::align.

2014-10-13 Thread Jonathan Wakely

On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote:

On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote:

Fixed (the new function should be in namespace std!) and applied to
trunk. Thanks for the patch.


Thanks for merging and improving my patches!

If you have the time maybe you could also look at the put_time patch.  Unlike
get_time it does not require an ABI change.


I was about to close https://gcc.gnu.org/PR57350 as fixed, but the
test in the SO question it links to fails with your implementation
that I've just committed:

https://stackoverflow.com/questions/16305311/usage-issue-of-stdalign




Re: [libstdc++] Add xmethods for array, deque, forward_list, list and vector.

2014-10-13 Thread Siva Chandra
On Mon, Oct 13, 2014 at 4:23 AM, Jonathan Wakely jwak...@redhat.com wrote:
 On 12/10/14 06:49 -0700, Siva Chandra wrote:

 Hello,

 Attached is a patch which adds xmethods for std::array, std::deque,
 std::forward_list, std::list and std::vector.  There were already
 couple of xmethods existing for std::vector, but this patch adds more
 over them.

 Committed - thanks!

That was fast. Thanks a lot!

- Siva Chandra


Re: [PATCH x86] Increase PARAM_MAX_COMPLETELY_PEELED_INSNS when branch is costly

2014-10-13 Thread Jan Hubicka
 On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko evstu...@gmail.com 
 wrote:
  Hi,
 
  The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with
  high branch cost.
  Bootstrap and make check are in progress.
  The patch boosts (up to 2,5 times improve) several benchmarks compiled
  with -Ofast on Silvermont
  Spec2000:
  +5% gain on 173.applu
  +1% gain on 255.vortex
 
  Is it ok for trunk when pass bootstrap and make check?
 
 This is only a 20% increase - from 100 to 120.  I would instead suggest
 to explore doing this change unconditionally if it helps that much.

Agreed, I think the value of 100 was set decade ago by Zdenek and me completely
artifically. I do not recall any serious tuning of this flag.

Note that I plan to update
https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html to current tree so
PARAM_MAX_COMPLETELY_PEELED_INSNS will be used at gimple level rather than tree
changing its meaning somewhat.

Perhaps I could try to find time this or next week to update the patch so we do
not need to do the tuning twice.

Honza

 
 Richard.
 
  Thanks,
  Evgeny
 
  2014-10-10  Evgeny Stupachenko  evstu...@gmail.com
  * config/i386/i386.c (ix86_option_override_internal): Increase
  PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost.
  * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New.
  * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates
  CPUs with high branch cost.
 
  diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
  index 6337aa5..5ac10eb 100644
  --- a/gcc/config/i386/i386.c
  +++ b/gcc/config/i386/i386.c
  @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p,
   opts-x_param_values,
   opts_set-x_param_values);
 
  +  /* Extend full peel max insns parameter for CPUs with high branch cost.  
  */
  +  if (TARGET_HIGH_BRANCH_COST)
  +maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
  +  120,
  +  opts-x_param_values,
  +  opts_set-x_param_values);
  +
  +
 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
 if (opts-x_flag_prefetch_loop_arrays  0
  HAVE_prefetch
  diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
  index 2c64162..da0c57b 100644
  --- a/gcc/config/i386/i386.h
  +++ b/gcc/config/i386/i386.h
  @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
   #define TARGET_INTER_UNIT_CONVERSIONS \
  ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
   #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
  +#define TARGET_HIGH_BRANCH_COST
  ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST]
   #define TARGET_SCHEDULE
  ix86_tune_features[X86_TUNE_SCHEDULE]
   #define TARGET_USE_BT  ix86_tune_features[X86_TUNE_USE_BT]
   #define TARGET_USE_INCDEC  ix86_tune_features[X86_TUNE_USE_INCDEC]
  diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
  index b6b210e..04d8bf8 100644
  --- a/gcc/config/i386/x86-tune.def
  +++ b/gcc/config/i386/x86-tune.def
  @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, four_jump_limit,
 m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
m_ATHLON_K8 | m_AMDFAM10)
 
  +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost.  This 
  could be
  +   used to tune unroll, if-cvt, inline... heuristics.  */
  +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, high_branch_cost,
  +  m_BONNELL | m_SILVERMONT | m_INTEL)
  +
   
  /*/
   /* Integer instruction selection tuning
*/
   
  /*/


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [9/n] Static constructors

2014-10-13 Thread Ilya Enkovich
On 09 Oct 13:04, Jeff Law wrote:
 On 10/08/14 13:10, Ilya Enkovich wrote:
 Hi,
 
 This patch introduces functions to handle static pointers and static bounds.
 
 Thanks,
 Ilya
 --
 2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com
 
  * tree-chkp.c (MAX_STMTS_IN_STATIC_CHKP_CTOR): New.
  (chkp_ctor_stmt_list): New.
  (chkp_register_var_initializer): New.
  (chkp_add_modification_to_stmt_list): New.
  (chkp_output_static_bounds): New.
  (chkp_finish_file): New.
  (chkp_instrument_function): Remove useless statements
  from static bounds constructors.
  * tree-chkp.h (chkp_register_var_initializer): New.
  (chkp_finish_file): New.
 Thanks for clarifying on the testcase.  I misunderstood the testing
 methodology and hence the results made no sense to me :-)
 
 
 Make the maximum statements a PARAM
 
 
 diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
 index b424af8..4b5a773 100644
 --- a/gcc/tree-chkp.c
 +++ b/gcc/tree-chkp.c
 @@ -394,6 +394,27 @@ static bool in_chkp_pass;
   #define CHKP_ZERO_BOUNDS_VAR_NAME __chkp_zero_bounds
   #define CHKP_NONE_BOUNDS_VAR_NAME __chkp_none_bounds
 
 +/* Static checker constructors may become very large and their
 +   compilation with optimization may take too much time.
 +   Therefore we put a limit to number of statements in one
 +   construcor.  Tests with 100 000 statically initialized
 s/construcor/constructor/
 
 
 
 +   static bounds initilization.  If VAR is added into
 +   bounds initlization list then 1 is returned. Otherwise
 s/initilization/initialization/
 
 +   into list of static initilizer statementes (passed in ARG).
 s/initilizer/initializer/
 
 This will be fine with the change to a PARAM and the nit spelling
 stuff fixed.
 
 jeff

Thanks for review!  Here is a fixed version.

Ilya
--
2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* tree-chkp.c (MAX_STMTS_IN_STATIC_CHKP_CTOR): New.
(chkp_ctor_stmt_list): New.
(chkp_register_var_initializer): New.
(chkp_add_modification_to_stmt_list): New.
(chkp_output_static_bounds): New.
(chkp_finish_file): New.
(chkp_instrument_function): Remove useless statements
from static bounds constructors.
* tree-chkp.h (chkp_register_var_initializer): New.
(chkp_finish_file): New.
* doc/invoke.texi (chkp-max-ctor-size): New.
* params.def (PARAM_CHKP_MAX_CTOR_SIZE): New.


diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1d8ab03..8128dff 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10459,6 +10459,12 @@ is greater or equal to this number, use callbacks 
instead of inline checks.
 E.g. to disable inline code use
 @option{--param asan-instrumentation-with-call-threshold=0}.
 
+@item chkp-max-ctor-size
+Static constructors generated by Pointer Bounds Checker may become very
+large and significantly increase compile time at optimization level
+@option{-O1} and higher.  This parameter is a maximum nubmer of statements
+in a single generated constructor.  Default value is 5000.
+
 @end table
 @end table
 
diff --git a/gcc/params.def b/gcc/params.def
index aefdd07..af490e0 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1099,6 +1099,12 @@ DEFPARAM (PARAM_UNINIT_CONTROL_DEP_ATTEMPTS,
  Maximum number of nested calls to search for control dependencies 
  during uninitialized variable analysis,
  1000, 1, 0)
+
+DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE,
+ chkp-max-ctor-size,
+ Maximum number of statements to be included into a single static 
+ constructor generated by Pointer Bounds Checker,
+ 5000, 100, 0)
 /*
 
 Local variables:
diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
index 0abe192..21c6138 100644
--- a/gcc/tree-chkp.c
+++ b/gcc/tree-chkp.c
@@ -65,6 +65,7 @@ along with GCC; see the file COPYING3.  If not see
 #include rtl.h /* For MEM_P, assign_temp.  */
 #include tree-dfa.h
 #include ipa-chkp.h
+#include params.h
 
 /*  Pointer Bounds Checker instruments code with memory checks to find
 out-of-bounds memory accesses.  Checks are performed by computing
@@ -394,6 +395,27 @@ static bool in_chkp_pass;
 #define CHKP_ZERO_BOUNDS_VAR_NAME __chkp_zero_bounds
 #define CHKP_NONE_BOUNDS_VAR_NAME __chkp_none_bounds
 
+/* Static checker constructors may become very large and their
+   compilation with optimization may take too much time.
+   Therefore we put a limit to number of statements in one
+   constructor.  Tests with 100 000 statically initialized
+   pointers showed following compilation times on Sandy Bridge
+   server (used -O2):
+   limit100 = ~18 sec.
+   limit300 = ~22 sec.
+   limit   1000 = ~30 sec.
+   limit   3000 = ~49 sec.
+   limit   5000 = ~55 sec.
+   limit  1 = ~76 sec.
+   limit 10 = ~532 sec.  */
+#define MAX_STMTS_IN_STATIC_CHKP_CTOR (PARAM_VALUE (PARAM_CHKP_MAX_CTOR_SIZE))
+
+struct chkp_ctor_stmt_list
+{
+  tree stmts;
+  int avail;
+};
+
 /* Return 1 if function FNDECL is 

Fix bootstrap/63496 (pasto in ipa-polymorphic-call-context.c)

2014-10-13 Thread Jan Hubicka
Hi,
this patch fixes pasto in ipa-polymorphic-call.c. Of course I was not intending 
to compare
offset+64 with offset ;)

Honza

PR bootstrap/63496
* ipa-polymorphic-call.c (extr_type_from_vtbl_ptr_store): Fix pasto.
Index: ipa-polymorphic-call.c
===
--- ipa-polymorphic-call.c  (revision 216141)
+++ ipa-polymorphic-call.c  (working copy)
@@ -1235,7 +1235,7 @@ extr_type_from_vtbl_ptr_store (gimple st
  if (dump_file)
fprintf (dump_file, wrong offset %i!=%i or size %i\n,
 (int)offset, (int)tci-offset, (int)size);
- return offset + GET_MODE_BITSIZE (Pmode) = offset
+ return offset + GET_MODE_BITSIZE (Pmode) = tci-offset
 || (max_size != -1
  tci-offset + GET_MODE_BITSIZE (Pmode)  offset + 
max_size)
 ? error_mark_node : NULL;


Re: [PATCH 1/2] libstdc++: Add std::align.

2014-10-13 Thread Jonathan Wakely

On 13/10/14 13:10 +0100, Jonathan Wakely wrote:

On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote:

On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote:

Fixed (the new function should be in namespace std!) and applied to
trunk. Thanks for the patch.


Thanks for merging and improving my patches!

If you have the time maybe you could also look at the put_time patch.  Unlike
get_time it does not require an ABI change.


I was about to close https://gcc.gnu.org/PR57350 as fixed, but the
test in the SO question it links to fails with your implementation
that I've just committed:

https://stackoverflow.com/questions/16305311/usage-issue-of-stdalign


I'm testing this fix.

commit 60aa345d8122053f7c8ba7c743b458e799eb1455
Author: Jonathan Wakely jwak...@redhat.com
Date:   Mon Oct 13 14:02:39 2014 +0100

	PR libstdc++/57350
	* include/std/memory (align): Do not adjust correctly aligned address.
	* testsuite/20_util/align/2.cc: New.

diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory
index affc8b1..b5792ad 100644
--- a/libstdc++-v3/include/std/memory
+++ b/libstdc++-v3/include/std/memory
@@ -113,14 +113,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 inline void*
 align(size_t __align, size_t __size, void* __ptr, size_t __space) noexcept
 {
-  const size_t __diff = __align - reinterpret_castuintptr_t(__ptr) % __align;
-  if (__diff + __size = __space)
+  const auto __intptr = reinterpret_castuintptr_t(__ptr);
+  const auto __aligned = (__intptr - 1u + __align)  -__align;
+  const auto __diff = __aligned - __intptr;
+  if ((__size + __diff)  __space)
 return nullptr;
   else
 {
   __space -= __diff;
-  __ptr = static_castchar*(__ptr) + __diff;
-  return __ptr;
+  return __ptr = reinterpret_castvoid*(__aligned);
 }
 }
 
diff --git a/libstdc++-v3/testsuite/20_util/align/2.cc b/libstdc++-v3/testsuite/20_util/align/2.cc
new file mode 100644
index 000..efad56a
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/align/2.cc
@@ -0,0 +1,42 @@
+// { dg-options  -std=gnu++11  }
+
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 3, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+// C++11 [ptr.align] (20.6.5): std::align
+
+#include memory
+#include testsuite_hooks.h
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  int i = 0;
+  void* ptr = i;
+  auto space = sizeof(i);
+  auto p2 = std::align(alignof(i), space, ptr, space);
+  VERIFY( ptr == i );
+  VERIFY( p2 == i );
+  VERIFY(space == sizeof(i));
+}
+
+int main()
+{
+  test01();
+}


Re: [PATCH 3/5] IPA ICF pass

2014-10-13 Thread Martin Liška

On 10/11/2014 10:19 AM, Jan Hubicka wrote:


After few days of measurement and tuning, I was able to get numbers to the 
following shape:
Execution times (seconds)
  phase setup :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall 
   1412 kB ( 0%) ggc
  phase opt and generate  :  27.83 (59%) usr   0.66 (19%) sys  28.52 (37%) wall 
1028813 kB (24%) ggc
  phase stream in :  16.90 (36%) usr   0.63 (18%) sys  17.60 (23%) wall 
3246453 kB (76%) ggc
  phase stream out:   2.76 ( 6%) usr   2.19 (63%) sys  31.34 (40%) wall 
  2 kB ( 0%) ggc
  callgraph optimization  :   0.36 ( 1%) usr   0.00 ( 0%) sys   0.35 ( 0%) wall 
 40 kB ( 0%) ggc
  ipa dead code removal   :   3.31 ( 7%) usr   0.01 ( 0%) sys   3.25 ( 4%) wall 
  0 kB ( 0%) ggc
  ipa virtual call target :   3.69 ( 8%) usr   0.03 ( 1%) sys   3.80 ( 5%) wall 
 21 kB ( 0%) ggc
  ipa devirtualization:   0.12 ( 0%) usr   0.00 ( 0%) sys   0.15 ( 0%) wall 
  13704 kB ( 0%) ggc
  ipa cp  :   1.11 ( 2%) usr   0.07 ( 2%) sys   1.17 ( 2%) wall 
 188558 kB ( 4%) ggc
  ipa inlining heuristics :   8.17 (17%) usr   0.14 ( 4%) sys   8.27 (11%) wall 
 494738 kB (12%) ggc
  ipa comdats :   0.12 ( 0%) usr   0.00 ( 0%) sys   0.12 ( 0%) wall 
  0 kB ( 0%) ggc
  ipa lto gimple in   :   1.86 ( 4%) usr   0.40 (11%) sys   2.20 ( 3%) wall 
 537970 kB (13%) ggc
  ipa lto gimple out  :   0.19 ( 0%) usr   0.08 ( 2%) sys   0.27 ( 0%) wall 
  2 kB ( 0%) ggc
  ipa lto decl in :  12.20 (26%) usr   0.37 (11%) sys  12.64 (16%) wall 
2441687 kB (57%) ggc
  ipa lto decl out:   2.51 ( 5%) usr   0.21 ( 6%) sys   2.71 ( 3%) wall 
  0 kB ( 0%) ggc
  ipa lto constructors in :   0.13 ( 0%) usr   0.02 ( 1%) sys   0.17 ( 0%) wall 
  15692 kB ( 0%) ggc
  ipa lto constructors out:   0.03 ( 0%) usr   0.00 ( 0%) sys   0.03 ( 0%) wall 
  0 kB ( 0%) ggc
  ipa lto cgraph I/O  :   0.54 ( 1%) usr   0.09 ( 3%) sys   0.63 ( 1%) wall 
 407182 kB (10%) ggc
  ipa lto decl merge  :   1.34 ( 3%) usr   0.00 ( 0%) sys   1.34 ( 2%) wall 
   8220 kB ( 0%) ggc
  ipa lto cgraph merge:   1.00 ( 2%) usr   0.00 ( 0%) sys   1.00 ( 1%) wall 
  14605 kB ( 0%) ggc
  whopr wpa   :   0.92 ( 2%) usr   0.00 ( 0%) sys   0.89 ( 1%) wall 
  1 kB ( 0%) ggc
  whopr wpa I/O   :   0.01 ( 0%) usr   1.90 (55%) sys  28.31 (37%) wall 
  0 kB ( 0%) ggc
  whopr partitioning  :   2.81 ( 6%) usr   0.01 ( 0%) sys   2.83 ( 4%) wall 
   4943 kB ( 0%) ggc
  ipa reference   :   1.34 ( 3%) usr   0.00 ( 0%) sys   1.35 ( 2%) wall 
  0 kB ( 0%) ggc
  ipa profile :   0.20 ( 0%) usr   0.01 ( 0%) sys   0.21 ( 0%) wall 
  0 kB ( 0%) ggc
  ipa pure const  :   1.62 ( 3%) usr   0.00 ( 0%) sys   1.63 ( 2%) wall 
  0 kB ( 0%) ggc
  ipa icf :   2.65 ( 6%) usr   0.02 ( 1%) sys   2.68 ( 3%) wall 
   1352 kB ( 0%) ggc
  inline parameters   :   0.00 ( 0%) usr   0.01 ( 0%) sys   0.00 ( 0%) wall 
  0 kB ( 0%) ggc
  tree SSA rewrite:   0.11 ( 0%) usr   0.01 ( 0%) sys   0.08 ( 0%) wall 
  18919 kB ( 0%) ggc
  tree SSA other  :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) wall 
  0 kB ( 0%) ggc
  tree SSA incremental:   0.24 ( 1%) usr   0.01 ( 0%) sys   0.32 ( 0%) wall 
  11325 kB ( 0%) ggc
  tree operand scan   :   0.15 ( 0%) usr   0.02 ( 1%) sys   0.18 ( 0%) wall 
 116283 kB ( 3%) ggc
  dominance frontiers :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.02 ( 0%) wall 
  0 kB ( 0%) ggc
  dominance computation   :   0.13 ( 0%) usr   0.01 ( 0%) sys   0.16 ( 0%) wall 
  0 kB ( 0%) ggc
  varconst:   0.01 ( 0%) usr   0.02 ( 1%) sys   0.01 ( 0%) wall 
  0 kB ( 0%) ggc
  loop fini   :   0.02 ( 0%) usr   0.00 ( 0%) sys   0.04 ( 0%) wall 
  0 kB ( 0%) ggc
  unaccounted todo:   0.55 ( 1%) usr   0.00 ( 0%) sys   0.56 ( 1%) wall 
  0 kB ( 0%) ggc
  TOTAL :  47.49 3.4877.46
4276682 kB

and I was able to reduce function bodies loaded in WPA to 35% (from previous 
55%). The main problem


35% means that 35% of all function bodies are compared with something else? 
That feels pretty high.
but overall numbers are not so terrible.


Currently, the pass is able to merge 32K functions. As you know, we group 
functions to so called classes.
According to stats, average non-singular class size contains at the end of 
comparison 7.39 candidates and we
have 5K such functions. Because we load body for each candidate in such groups, 
it gives us minimum number
of loaded bodies: 37K. As we load 70K function, we have still place to improve. 
But I guess WPA body-less
comparison is quite efficient.




with speed was hidden in work list for congruence classes, where hash_set was 
used. I chose the data
structure to support delete operation, but it was really slow. Thus, hash_set 
was replaced with linked list
and a flag is used to identify if a set is 

Re: [RFC: Patch, PR 60102] [4.9/4.10 Regression] powerpc fp-bit ices@dwf_regno

2014-10-13 Thread Ulrich Weigand
Maciej W. Rozycki wrote:
 On Thu, 9 Oct 2014, Maciej W. Rozycki wrote:
 
   Seeing Rohit got good results it has struck me that perhaps one of the 
  patches I had previously reverted, to be able to compile GCC in the first 
  place, interfered with this fix -- I backed out all the subsequent patches 
  to test yours and Rohit's by themselves only.  And it was actually the 
  case, with this change:
  
  2013-05-21  Christian Bruel  christian.br...@st.com
  
  * dwarf2out.c (multiple_reg_loc_descriptor): Use dbx_reg_number for
  spanning registers. LEAF_REG_REMAP is supported only for contiguous
  registers. Set register size out of the PARALLEL loop.
  
  back in place, in addition to your fix, I get an all-passed score for 
  gdb.base/store.exp.  So your change looks good and my decision to back out 
  the other patches unfortunate.  I'll yet run full e500v2 testing now to 
  double check, and let you know what the results are, within a couple of 
  hours if things work well.
 
  It took a bit more because I saw some regressions that I wanted to 
 investigate.  In the end they turned out intermittent and the failures 
 happen sometimes whether your change is applied or not.  So I'm fine with 
 your change, thanks for your work and patience.

Thanks for verifying!

David, is the patch OK to commit now?

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  ulrich.weig...@de.ibm.com



Re: [PATCH 3/5] IPA ICF pass

2014-10-13 Thread Jan Hubicka
 
 35% means that 35% of all function bodies are compared with something else? 
 That feels pretty high.
 but overall numbers are not so terrible.
 
 Currently, the pass is able to merge 32K functions. As you know, we group 
 functions to so called classes.
 According to stats, average non-singular class size contains at the end of 
 comparison 7.39 candidates and we
 have 5K such functions. Because we load body for each candidate in such 
 groups, it gives us minimum number
 of loaded bodies: 37K. As we load 70K function, we have still place to 
 improve. But I guess WPA body-less
 comparison is quite efficient.

OK, that seems resonable.
 
 
 with speed was hidden in work list for congruence classes, where hash_set 
 was used. I chose the data
 structure to support delete operation, but it was really slow. Thus, 
 hash_set was replaced with linked list
 and a flag is used to identify if a set is removed or not.
 
 Interesting, I would not expect bottleneck in a congruence solving :)
 
 The problem was just the hash_set that showed to be slow data structure for a 
 set of operations needed
 in congruence solving.
 
 
 I have no clue who complicated can it be to implement release_body function 
 to an operation that
 really releases the memory?
 
 I suppose one can keep the caches from streamer and free trees read.  Freeing
 gimple statemnts, cfg should be relatively easy.
 
 Lets however first try to tune the implementation rather than try to this 
 hack
 implemented. Explicit ggc_free calls traditionally tended to cause some 
 negative
 reactions wrt memory fragmentation concerns.
 
 Agree with suggested approach.

In future we actually may keep the duplicated functions in WPA memory and use 
corresponding body
whenever the function is inlined to avoid disturbing debug info more than 
needed.

Honza
 
 
 
 Markus' problem with -fprofile-use has been removed, IPA-ICF is preceding 
 devirtualization pass. I hope it is fine?
 
 Yes, I think devirtualization should actually work better with identical
 virutal methods merged.  We just need to be sure it sees through the newly
 introduced aliases (there should be no thunks for virutal methods)
 
 Thanks,
 Martin
 
 
 Honza
 


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [11/n] Optimization helpers

2014-10-13 Thread Ilya Enkovich
On 10 Oct 10:14, Jeff Law wrote:
 On 10/10/14 08:24, Ilya Enkovich wrote:
 On 09 Oct 12:09, Jeff Law wrote:
 On 10/08/14 13:16, Ilya Enkovich wrote:
 Hi,
 
 This patch introduces structures and manipulation functions used by simple 
 checker optimizations.  Structures are used to hold checks information - 
 type of check and checked address in a polinomial form.
 
 Thanks,
 Ilya
 --
 2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com
 
* tree-chkp.c (check_type): New.
(pol_item): New.
(address_t): New.
(check_info): New.
(bb_checks): New.
(chkp_pol_item_compare): New.
(chkp_pol_find): New.
(chkp_extend_const): New.
(chkp_add_addr_item): New.
(chkp_sub_addr_item): New.
(chkp_add_addr_addr): New.
(chkp_sub_addr_addr): New.
(chkp_mult_addr): New.
(chkp_is_constant_addr): New.
(chkp_print_addr): New.
(chkp_collect_addr_value): New.
(chkp_collect_value): New.
(chkp_fill_check_info): New.
 
 
 +/* Find plynomial item in ADDR with var equal to VAR
 s/plynomial/polynomial/
 
 With nit fixed and functions moved into whatever new file gets
 created for the optimization work  this will be OK.
 jeff
 
 Thanks for review!  Here is a fixed version.
 
 Ilya
 --
 2014-10-10  Ilya Enkovich  ilya.enkov...@intel.com
 
  * tree-chkp-opt.c: New.
  * Makefile.in (OBJS): Add tree-chkp-opt.o.
 
 
 diff --git a/gcc/Makefile.in b/gcc/Makefile.in
 index d8c8488..cd45b29 100644
 --- a/gcc/Makefile.in
 +++ b/gcc/Makefile.in
 @@ -1389,6 +1389,7 @@ OBJS = \
  tree-parloops.o \
  tree-phinodes.o \
  tree-chkp.o \
 +tree-chkp-opt.o \
  tree-predcom.o \
  tree-pretty-print.o \
  tree-profile.o \
 diff --git a/gcc/tree-chkp-opt.c b/gcc/tree-chkp-opt.c
 new file mode 100644
 index 000..103c4bb
 --- /dev/null
 +++ b/gcc/tree-chkp-opt.c
 @@ -0,0 +1,463 @@
 +/* Pointer Bounds Checker optimization pass.
 +   Copyright (C) 2014 Free Software Foundation, Inc.
 +   Contributed by Ilya Enkovich (ilya.enkov...@intel.com)
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free
 +Software Foundation; either version 3, or (at your option) any later
 +version.
 +
 +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 +WARRANTY; without even the implied warranty of MERCHANTABILITY or
 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 +for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +http://www.gnu.org/licenses/.  */
 +
 +#include config.h
 +#include system.h
 +#include coretypes.h
 +#include tree-core.h
 +#include stor-layout.h
 +#include varasm.h
 +#include tree.h
 +#include target.h
 +#include tree-iterator.h
 +#include tree-cfg.h
 +#include langhooks.h
 +#include tree-pass.h
 +#include hashtab.h
 +#include diagnostic.h
 +#include ggc.h
 +#include output.h
 +#include internal-fn.h
 +#include is-a.h
 +#include predict.h
 +#include cfgloop.h
 +#include stringpool.h
 +#include tree-ssa-alias.h
 +#include tree-ssanames.h
 +#include tree-ssa-operands.h
 +#include tree-ssa-address.h
 +#include tree-ssa.h
 +#include ipa-inline.h
 +#include basic-block.h
 +#include tree-ssa-loop-niter.h
 +#include gimple-expr.h
 +#include gimple.h
 +#include tree-phinodes.h
 +#include gimple-ssa.h
 +#include ssa-iterators.h
 +#include gimple-pretty-print.h
 +#include gimple-iterator.h
 +#include gimplify.h
 +#include gimplify-me.h
 +#include print-tree.h
 +#include expr.h
 +#include tree-ssa-propagate.h
 +#include gimple-fold.h
 +#include gimple-walk.h
 +#include tree-dfa.h
 +#include tree-chkp.h
 Thanks.  Looks good.
 
 As a follow-up, can you try to trim down what appear to be the
 over-zealous includes?   It's a minor thing, but we are trying to be
 a bit saner about that kind of stuff than we've been in the past.
 
 If you've already done that, then, well, we've clearly still got a
 ways to go.  For example, I can't see why you'd need output.h here
 :-0
 
 
 Jeff

Thanks for review!  This includes list is from tree-chkp.c and surely is 
reducible.  I also revisited tree-chkp.c and removed few includes from there.  
Here is a new version.

Ilya
--
2014-10-10  Ilya Enkovich  ilya.enkov...@intel.com

* tree-chkp-opt.c: New.
* Makefile.in (OBJS): Add tree-chkp-opt.o.


diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d8c8488..cd45b29 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1389,6 +1389,7 @@ OBJS = \
tree-parloops.o \
tree-phinodes.o \
tree-chkp.o \
+   tree-chkp-opt.o \
tree-predcom.o \
tree-pretty-print.o \
tree-profile.o \
diff --git a/gcc/tree-chkp-opt.c b/gcc/tree-chkp-opt.c
new file mode 100644
index 000..08be848
--- /dev/null
+++ b/gcc/tree-chkp-opt.c
@@ -0,0 +1,447 @@
+/* Pointer Bounds Checker optimization pass.
+   

[C++ patch] PR tree-optimization/62053 (VLA arrays)

2014-10-13 Thread Jan Hubicka
Hi,
this patch triggers assert in tree-inline that verify that array type and its 
main variant have same size
(as pointer).  In this case we have two types that do have same size, but the 
expression is different.
It is created in both cases by layout_type becuase C++ FE calls layout_type 
before linking the variants.
I think it is safe to move the call and avoid the duplication.

Bootstrapped/regtested x86_64-linux, OK?

Honza

PR tree-optimization/62053
* g++.dg/torture/pr62053.C: New testcase.
* tree.c: Avoid ordering issue with layout_type.

Index: testsuite/g++.dg/torture/pr62053.C
===
--- testsuite/g++.dg/torture/pr62053.C  (revision 0)
+++ testsuite/g++.dg/torture/pr62053.C  (revision 0)
@@ -0,0 +1,7 @@
+// { dg-do compile }
+typedef int GType;
+class YGTable
+{
+  YGTable () { GType a[m_fn1 ()]; }
+  int m_fn1 ();
+};
Index: cp/tree.c
===
--- cp/tree.c   (revision 216145)
+++ cp/tree.c   (working copy)
@@ -863,12 +863,12 @@ build_cplus_array_type (tree elt_type, t
{
  t = build_min_array_type (elt_type, index_type);
  set_array_type_canon (t, elt_type, index_type);
- if (!dependent)
-   layout_type (t);
 
  TYPE_MAIN_VARIANT (t) = m;
  TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (m);
  TYPE_NEXT_VARIANT (m) = t;
+ if (!dependent)
+   layout_type (t);
}
 }
 


[committed] Fix comment about RUNTESTFLAGS and parallelization

2014-10-13 Thread Christophe Lyon
Hi,

After Jakub's pre-approval
https://gcc.gnu.org/ml/gcc/2014-10/msg00084.html

I have committed the small attached patch as r216147.

2014-10-13  Christophe Lyon  christophe.l...@linaro.org

* Makefile.in: (check-%): Update comment, as RUNTESTFLAGS no
longer impact parallelization.

Christophe.
Index: gcc/Makefile.in
===
--- gcc/Makefile.in (revision 216146)
+++ gcc/Makefile.in (revision 216147)
@@ -3632,10 +3632,9 @@
$(check_p_numbers)))
 
 # For parallelized check-% targets, this decides whether parallelization
-# is desirable (if -jN is used and RUNTESTFLAGS doesn't contain anything
-# but optional --target_board or --extra_opts arguments).  If desirable,
-# recursive make is run with check-parallel-$lang{,1,2,3,4,5} etc. goals,
-# which can be executed in parallel, as they are run in separate directories.
+# is desirable (if -jN is used).  If desirable, recursive make is run with
+# check-parallel-$lang{,1,2,3,4,5} etc. goals, which can be executed in
+# parallel, as they are run in separate directories.
 # check-parallel-$lang{,1,2,3,4,5} etc. goals invoke runtest with
 # GCC_RUNTEST_PARALLELIZE_DIR var in the environment and runtest_file_p
 # dejaGNU procedure is overridden to additionally synchronize through


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [8/n] Remove useless builtin calls

2014-10-13 Thread Ilya Enkovich
On 10 Oct 10:11, Jeff Law wrote:
 On 10/10/14 08:52, Ilya Enkovich wrote:
 
 THanks, Jeff
 
 With this code we remove user builtins calls coming from source code.
 E.g.:
 
 p2 = (int *)__bnd_init_ptr_bounds (p1); *p2 = 0;
 
 which means p2 has value of p1 but has default bounds and following
 store is unchecked.  These calls are important for instrumentation
 but useless after instrumentation.  I don't think it is a part of
 checker optimizer because it doesn't optimize instrumentation code.
 Also this transformation is trivial enough for O0 and checker
 optimizer works starting from O2.
 
 Below is a version fixed according to Richard's comments.
 
 Thanks, Ilya -- 2014-10-10  Ilya Enkovich  ilya.enkov...@intel.com
 
 * tree-chkp.c (chkp_remove_useless_builtins): New. (chkp_execute):
 Remove useless calls to Pointer Bounds Checker builtins.
 Tests instrumentation are still needed.  With some basic tests and
 instrumentation this will be OK.
 
 I hate to be harping tests, but few developers are going to be
 familiar with the MPX and related infrastructure and those tests are
 critical to helping them know when they break something.
 
 Similarly if the plan is to iterate on improving things, then those
 basic functionality tests will ultimately save time as you can smoke
 test before running larger benchmarks.
 
 
 jeff

Here is a version with tests added.

Thanks,
Ilya
--
gcc/

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* tree-chkp.c (chkp_remove_useless_builtins): New.
(chkp_execute): Remove useless calls to Pointer Bounds
Checker builtins.

gcc/testsuite

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* gcc.target/i386/chkp-builtins-1.c: New.
* gcc.target/i386/chkp-builtins-2.c: New.
* gcc.target/i386/chkp-builtins-3.c: New.
* gcc.target/i386/chkp-builtins-4.c: New.


diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-1.c 
b/gcc/testsuite/gcc.target/i386/chkp-builtins-1.c
new file mode 100644
index 000..bcc1198
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */
+/* { dg-final { scan-tree-dump-not bnd_init_ptr_bounds chkp } } */
+
+void *
+chkp_test (void *p)
+{
+  return __builtin___bnd_init_ptr_bounds (p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-2.c 
b/gcc/testsuite/gcc.target/i386/chkp-builtins-2.c
new file mode 100644
index 000..1f4a244
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */
+/* { dg-final { scan-tree-dump-not bnd_copy_ptr_bounds chkp } } */
+
+void *
+chkp_test (void *p, void *q)
+{
+  return __builtin___bnd_copy_ptr_bounds (p, q);
+}
diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-3.c 
b/gcc/testsuite/gcc.target/i386/chkp-builtins-3.c
new file mode 100644
index 000..ea54ede
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */
+/* { dg-final { scan-tree-dump-not bnd_set_ptr_bounds chkp } } */
+
+void *
+chkp_test (void *p)
+{
+  return __builtin___bnd_set_ptr_bounds (p, 10);
+}
diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-4.c 
b/gcc/testsuite/gcc.target/i386/chkp-builtins-4.c
new file mode 100644
index 000..cee780b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-4.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */
+/* { dg-final { scan-tree-dump-not bnd_null_ptr_bounds chkp } } */
+
+void *
+chkp_test (void *p)
+{
+  return __builtin___bnd_null_ptr_bounds (p);
+}
diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
index 9be153a..5957e45 100644
--- a/gcc/tree-chkp.c
+++ b/gcc/tree-chkp.c
@@ -3800,6 +3800,44 @@ chkp_instrument_function (void)
}
 }
 
+/* Find init/null/copy_ptr_bounds calls and replace them
+   with assignments.  It should allow better code
+   optimization.  */
+
+static void
+chkp_remove_useless_builtins ()
+{
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+
+  FOR_EACH_BB_FN (bb, cfun)
+{
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi))
+{
+ gimple stmt = gsi_stmt (gsi);
+ tree fndecl;
+ enum built_in_function fcode;
+
+ /* Find builtins returning first arg and replace
+them with assignments.  */
+ if (gimple_code (stmt) == GIMPLE_CALL
+  (fndecl = gimple_call_fndecl (stmt))
+  DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
+  (fcode = DECL_FUNCTION_CODE (fndecl))
+  (fcode == BUILT_IN_CHKP_INIT_PTR_BOUNDS
+ || fcode == BUILT_IN_CHKP_NULL_PTR_BOUNDS
+ || fcode == BUILT_IN_CHKP_COPY_PTR_BOUNDS
+ || fcode == 

[PATCH][ARM]Add ACLE 2.0 predefined marco __ARM_FEATURE_IDIV

2014-10-13 Thread Renlin Li

Hi all,

This is a simple patch to add missing __ARM_FEATURE_IDIV__ predefined 
marco(ACLE 2.0) into TARGET_CPU_CPP_BUILTINS.
Is it Okay to commit?


gcc/ChangeLog:

2014-10-13  Renlin Li  renlin...@arm.com

* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add ACLE 2.0 predefined
marco __ARM_FEATURE_IDIV__.
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3623c70..941d1b4 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -164,7 +164,10 @@ extern char arm_arch_name[];
 	builtin_define (__ARM_EABI__);		\
 	  }		\
 	if (TARGET_IDIV)\
-	  builtin_define (__ARM_ARCH_EXT_IDIV__);	\
+ {  \
+builtin_define (__ARM_ARCH_EXT_IDIV__);	\
+builtin_define (__ARM_FEATURE_IDIV__);	\
+ }
 } while (0)
 
 #include config/arm/arm-opts.h

Re: [PATCH 1/2] libstdc++: Add std::align.

2014-10-13 Thread Jonathan Wakely

On 13/10/14 14:04 +0100, Jonathan Wakely wrote:

On 13/10/14 13:10 +0100, Jonathan Wakely wrote:

On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote:

On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote:

Fixed (the new function should be in namespace std!) and applied to
trunk. Thanks for the patch.


Thanks for merging and improving my patches!

If you have the time maybe you could also look at the put_time patch.  Unlike
get_time it does not require an ABI change.


I was about to close https://gcc.gnu.org/PR57350 as fixed, but the
test in the SO question it links to fails with your implementation
that I've just committed:

https://stackoverflow.com/questions/16305311/usage-issue-of-stdalign


I'm testing this fix.


Committed to trunk.


commit 60aa345d8122053f7c8ba7c743b458e799eb1455
Author: Jonathan Wakely jwak...@redhat.com
Date:   Mon Oct 13 14:02:39 2014 +0100

PR libstdc++/57350
* include/std/memory (align): Do not adjust correctly aligned address.
* testsuite/20_util/align/2.cc: New.

diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory
index affc8b1..b5792ad 100644
--- a/libstdc++-v3/include/std/memory
+++ b/libstdc++-v3/include/std/memory
@@ -113,14 +113,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
inline void*
align(size_t __align, size_t __size, void* __ptr, size_t __space) noexcept
{
-  const size_t __diff = __align - reinterpret_castuintptr_t(__ptr) % __align;
-  if (__diff + __size = __space)
+  const auto __intptr = reinterpret_castuintptr_t(__ptr);
+  const auto __aligned = (__intptr - 1u + __align)  -__align;
+  const auto __diff = __aligned - __intptr;
+  if ((__size + __diff)  __space)
return nullptr;
  else
{
  __space -= __diff;
-  __ptr = static_castchar*(__ptr) + __diff;
-  return __ptr;
+  return __ptr = reinterpret_castvoid*(__aligned);
}
}

diff --git a/libstdc++-v3/testsuite/20_util/align/2.cc 
b/libstdc++-v3/testsuite/20_util/align/2.cc
new file mode 100644
index 000..efad56a
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/align/2.cc
@@ -0,0 +1,42 @@
+// { dg-options  -std=gnu++11  }
+
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License as published by the Free Software
+// Foundation; either version 3, or (at your option) any later
+// version.
+
+// This library is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+// C++11 [ptr.align] (20.6.5): std::align
+
+#include memory
+#include testsuite_hooks.h
+
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  int i = 0;
+  void* ptr = i;
+  auto space = sizeof(i);
+  auto p2 = std::align(alignof(i), space, ptr, space);
+  VERIFY( ptr == i );
+  VERIFY( p2 == i );
+  VERIFY(space == sizeof(i));
+}
+
+int main()
+{
+  test01();
+}




[PATCH][AARCH64]Remove unused variable and marco

2014-10-13 Thread Renlin Li

Hi all,

This is a simple patch to remove unused variables and marco.
Is it Okay to commit?

gcc/ChangeLog:

2014-10-13  Renlin Li  renlin...@arm.com

* config/aarch64/aarch64.h (ARM_DEFAULT_PCS, arm_pcs_variant): Delete.

Regards,
Renlin LiFrom 88b6dc7fccf70c140070fbc5472d9bc1a8590754 Mon Sep 17 00:00:00 2001
From: Renlin Li renlin...@arm.com
Date: Mon, 13 Oct 2014 09:30:57 +0100
Subject: [PATCH 2/6] remove unused ARM_DEFAULT_PCS marco and arm_pcs_variant

---
 gcc/config/aarch64/aarch64.h |4 
 1 file changed, 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 73b68ee..77f4511 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -578,11 +578,7 @@ enum arm_pcs
 };
 
 
-extern enum arm_pcs arm_pcs_variant;
 
-#ifndef ARM_DEFAULT_PCS
-#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64
-#endif
 
 /* We can't use enum machine_mode inside a generator file because it
hasn't been created yet; we shouldn't be using any code that
-- 
1.7.9.5


[jit] Avoid overwhelming expect's buffer

2014-10-13 Thread David Malcolm
Committed to git branch dmalcolm/jit:

expect has a default buffer size of 2000 bytes (match_max).

If the testsuite programs send large amounts of data to stdout/stderr,
they could overwhelm this buffer.

Avoid this.

gcc/testsuite/ChangeLog.jit:
* jit.dg/harness.h [MAKE_DEJAGNU_H_THREADSAFE] (note): Redefine
note from dejagnu.h to new function dejagnu_note so that we can
make note be threadsafe.
(set_options): Don't enable GCC_JIT_BOOL_OPTION_DUMP_SUMMARY,
since it can generate large amounts of output that could overwhelm
expect's buffer.
* jit.dg/test-dot-product.c (verify_code): Use note rather than
printf, to give DejaGnu more chances to parse this log data,
rather than overflowing its buffer.
* jit.dg/test-factorial.c (verify_code): Likewise.
* jit.dg/test-fibonacci.c (verify_code): Likewise.
* jit.dg/test-fuzzer.c (main): Likewise.
* jit.dg/test-nested-loops.c (verify_code): Likewise.
* jit.dg/test-sum-of-squares.c (verify_code): Likewise.
* jit.dg/test-threads.c (note): New function, adding thread-safety
on top of dejagnu_note, the latter being the implementation
found in dejagnu.h.
(run_threaded_test): Use note rather than printf.
---
 gcc/testsuite/ChangeLog.jit| 21 +
 gcc/testsuite/jit.dg/harness.h |  4 +++-
 gcc/testsuite/jit.dg/test-dot-product.c|  2 +-
 gcc/testsuite/jit.dg/test-factorial.c  |  2 +-
 gcc/testsuite/jit.dg/test-fibonacci.c  |  2 +-
 gcc/testsuite/jit.dg/test-fuzzer.c |  4 ++--
 gcc/testsuite/jit.dg/test-nested-loops.c   |  2 +-
 gcc/testsuite/jit.dg/test-sum-of-squares.c |  2 +-
 gcc/testsuite/jit.dg/test-threads.c| 26 +-
 9 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/gcc/testsuite/ChangeLog.jit b/gcc/testsuite/ChangeLog.jit
index 798f7c9..7703212 100644
--- a/gcc/testsuite/ChangeLog.jit
+++ b/gcc/testsuite/ChangeLog.jit
@@ -1,3 +1,24 @@
+2014-10-13  David Malcolm  dmalc...@redhat.com
+
+   * jit.dg/harness.h [MAKE_DEJAGNU_H_THREADSAFE] (note): Redefine
+   note from dejagnu.h to new function dejagnu_note so that we can
+   make note be threadsafe.
+   (set_options): Don't enable GCC_JIT_BOOL_OPTION_DUMP_SUMMARY,
+   since it can generate large amounts of output that could overwhelm
+   expect's buffer.
+   * jit.dg/test-dot-product.c (verify_code): Use note rather than
+   printf, to give DejaGnu more chances to parse this log data,
+   rather than overflowing its buffer.
+   * jit.dg/test-factorial.c (verify_code): Likewise.
+   * jit.dg/test-fibonacci.c (verify_code): Likewise.
+   * jit.dg/test-fuzzer.c (main): Likewise.
+   * jit.dg/test-nested-loops.c (verify_code): Likewise.
+   * jit.dg/test-sum-of-squares.c (verify_code): Likewise.
+   * jit.dg/test-threads.c (note): New function, adding thread-safety
+   on top of dejagnu_note, the latter being the implementation
+   found in dejagnu.h.
+   (run_threaded_test): Use note rather than printf.
+
 2014-10-07  David Malcolm  dmalc...@redhat.com
 
* jit.dg/jit.exp (jit-dg-test): Prepend the installed bindir to
diff --git a/gcc/testsuite/jit.dg/harness.h b/gcc/testsuite/jit.dg/harness.h
index cee42f3..f326891 100644
--- a/gcc/testsuite/jit.dg/harness.h
+++ b/gcc/testsuite/jit.dg/harness.h
@@ -22,6 +22,7 @@
 #ifdef MAKE_DEJAGNU_H_THREADSAFE
 #define pass dejagnu_pass
 #define fail dejagnu_fail
+#define note dejagnu_note
 #endif
 
 #include dejagnu.h
@@ -29,6 +30,7 @@
 #ifdef MAKE_DEJAGNU_H_THREADSAFE
 #undef pass
 #undef fail
+#undef note
 #endif
 
 static char test[1024];
@@ -165,7 +167,7 @@ static void set_options (gcc_jit_context *ctxt, const char 
*argv0)
   gcc_jit_context_set_bool_option (
 ctxt,
 GCC_JIT_BOOL_OPTION_DUMP_SUMMARY,
-1);
+0);
 }
 
 #ifndef TEST_ESCHEWS_TEST_JIT
diff --git a/gcc/testsuite/jit.dg/test-dot-product.c 
b/gcc/testsuite/jit.dg/test-dot-product.c
index 2cde66d..a41109a 100644
--- a/gcc/testsuite/jit.dg/test-dot-product.c
+++ b/gcc/testsuite/jit.dg/test-dot-product.c
@@ -123,7 +123,7 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
   CHECK_NON_NULL (my_dot_product);
   double test_array[] = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.};
   double val = my_dot_product (10, test_array, test_array);
-  printf(my_dot_product returned: %f\n, val);
+  note (my_dot_product returned: %f, val);
   CHECK_VALUE (val, 385.0);
 }
 
diff --git a/gcc/testsuite/jit.dg/test-factorial.c 
b/gcc/testsuite/jit.dg/test-factorial.c
index eecd831..b2aaece 100644
--- a/gcc/testsuite/jit.dg/test-factorial.c
+++ b/gcc/testsuite/jit.dg/test-factorial.c
@@ -97,7 +97,7 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
 (my_factorial_fn_type)gcc_jit_result_get_code (result, my_factorial);
   CHECK_NON_NULL (my_factorial);
   int val = my_factorial 

PR 62127 (VLA arrays and remap_type)

2014-10-13 Thread Jan Hubicka
Hi,
this patch fixes bug I introduced in 
2014-06-28  Jan Hubicka  hubi...@ucw.cz

* tree-inline.c (remap_type_1): Do not duplicate fields
that are shared in between type and its main variant.

when adding the conditionals I somehow dropped else path of the test
copying TREE_TYPE of array.

Bootstrapped/regtested x86_64-linux, comitted as obvious.

Honza

PR tree-optimization/62127
* g++.dg/torture/pr62127.C: New testcase.
* tree.c (remap_type_1): When remapping array, remap
also its type.

Index: testsuite/g++.dg/torture/pr62127.C
===
--- testsuite/g++.dg/torture/pr62127.C  (revision 0)
+++ testsuite/g++.dg/torture/pr62127.C  (revision 0)
@@ -0,0 +1,11 @@
+// { dg-do compile }
+struct A
+{
+  A(int);
+};
+
+A::A(int i)
+{
+  int x[1][i];
+  x[0][0] = 0;
+}
Index: tree-inline.c
===
--- tree-inline.c   (revision 216145)
+++ tree-inline.c   (working copy)
@@ -496,6 +496,8 @@ remap_type_1 (tree type, copy_body_data
   if (TYPE_MAIN_VARIANT (new_tree) != new_tree
   TREE_TYPE (type) == TREE_TYPE (TYPE_MAIN_VARIANT (type)))
TREE_TYPE (new_tree) = TREE_TYPE (TYPE_MAIN_VARIANT (new_tree));
+  else
+   TREE_TYPE (new_tree) = remap_type (TREE_TYPE (new_tree), id);
 
   if (TYPE_MAIN_VARIANT (new_tree) != new_tree)
{


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [12/n] Optimize string functions

2014-10-13 Thread Ilya Enkovich
On 10 Oct 10:25, Jeff Law wrote:
 On 10/10/14 08:19, Ilya Enkovich wrote:
 So is the purpose here to expose the checks that would normally be
 done in the mem* routines to their caller in the hopes that doing
 so will expose redundant checks?  Or is there some other reason?
 
 There are few reasons to replace instrumented string functions:
 
 1.  As you said following redundant checks elimination may remove
 checks for some string functions
 2.  By default functions like memcpy
 should assume pointers are copied and copy bounds.  If we know
 pointers are not copied, we may use faster version with no bounds
 copy
 3.  If we avoid both checks and bounds copy then it is a
 candidate for existing string function calls inlining in expand pass
 Perfect.  So this belongs in a comment in the code.
 
 I thought tests will be added later.
 Did you already post them?  There's been so many patches I'm
 starting to lose track :-)

I didn't post tests yet.

 
 For future reference, when you break a submission down into logical
 hunks, including the tests in those logical hunks helps.  I realize
 the MPX work isn't as well suited for that kind of breakdown, but
 it's worth keeping in mind.
 
 
  I have ~250 tests to commit.
 Will check I have tests for optimizations.
 Excellent.
 
 
  BTW this particular
 optimization cann't work until we have instrumented builtin calls.
 Yea, hopefully we'll get to that before close of stage1.
 
 It's a nit, but I'd tend to write that as:
 
 if (!fndecl_nochk) continue;
 
 fndecl = fndecl_nochk gimple_call_set_fndecl (stmt, fndecl);
 
 
 
 
 There is one more assignment to fndecl above which makes your version
 nonequivalent.
 I had assumed the gimple_call_set_fndecl was a nop if we didn't
 change the fndecl.  Is that not the case?

Right.  But (!fndecl_nochk) doesn't mean we didn't change fndecl because there 
is another set to fndecl above.

Ilya

 
 I'm a bit surprised we don't have this kind of capability already
 broken out.  But assuming that's the case, can you go ahead and
 break that out into its own little helper function?You don't
 need to find all the cases where we're doing this kind of thing
 today, just create the helper function and use it in your new
 code.
 
 I could miss such function  (looked in cfg hooks and tree-cfg.h).
 Hopefully someone will correct me if it is so.
 Thanks.  I suspect everyone has just done their own implementation
 inline like you did.  It's something I'll be keeping my eye out for
 in others' code so we can funnel everyone into your new function.
 ISTM many speculative optimizations are going to need that kind of
 helper.
 
 Taking into account not instrumented builtin calls I suppose this
 patch goes into a next builtin related series.  But here is a version
 with changes.
 Yea, I think you're right.  I think this is OK when the builtins are done.
 
 jeff


PING: Re: [patch] tag ../include/*

2014-10-13 Thread Aldy Hernandez

PING

On 10/07/14 09:37, Aldy Hernandez wrote:

Is there a reason we don't create etags for toplevel include files?  If
not, could I please apply this patch?

Thanks.
Aldy




Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Evgeny Stupachenko
On Fri, Oct 10, 2014 at 8:03 PM, Jeff Law l...@redhat.com wrote:
 On 10/10/14 01:42, Evgeny Stupachenko wrote:

 Hi,

 The patch enables EBX in RA for x86 32bits PIC mode.
 It was discussed here:
 https://gcc.gnu.org/ml/gcc-patches/2014-09/msg02513.html
 Now there is working version with good performance and stability level
 - it could be a solid first step of EBX enabling.

 Bootstrap and make check passed.
 There are several changes in -m32 make check.

 New pass:
 gcc.target/i386/pr57003.c - before patch there was not enough registers to
 PASS

 ?!?  That doesn't make a lot of sense.  More likely it was Uros's fix from
 yesterday to regcprop which causes this to pass again.
Correct. I've marked it by mistake. The test is flaky and the patch
does not change anything for the test.

 Is it possible you updated your sources between testing runs and as a result
 picked up Uros's fix?


 New fails:

 gcc.target/i386/pic-1.c (test for errors, line 12) - now there are no
 errors as we can do asm insertions with EBX

 I think you should remove the dg-error directive.  That turns this test into
 a simple confirmation that we can use %ebx in an asm even when generating
 PIC code.

 Can you add a PR markers to your changelog

 PR target/8340
 PR middle-end/47602
 PR rtl-optimization/55458

 Actually I think there is an additional test in 47602.  Can you please add
 it to the suite?  You'll also want to change the state of 47602 to
 RESOLVED/FIXED.







 gcc.target/i386/pr23098.c scan-assembler-not .LC[0-9] - potential
 performance opportunity using constant immediate

 If you're not going to fix it, then you should xfail it.

 gcc.target/i386/pr55458.c (test for errors, line 10) - now there are
 no errors as there enough registers

 Right.  Remove the dg-error and turn this into a test that effective
 verifies that %ebx is no longer fixed when generating PIC code on i686.

 With those changes this is OK for the trunk.

 jeff



ChangeLog for testsuite:

2014-10-13  Evgeny Stupachenko  evstu...@gmail.com

PR target/8340
PR middle-end/47602
PR rtl-optimization/55458
* gcc.target/i386/pic-1.c: Remove dg-error as test should pass now.
* gcc.target/i386/pr55458.c: Likewise.
* gcc.target/i386/pr47602.c: New.
* gcc.target/i386/pr23098.c: Move to XFAIL.


enabling_ebx_tests.patch
Description: Binary data


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [15/n] Optimize redundant checks

2014-10-13 Thread Ilya Enkovich
2014-10-10 20:56 GMT+04:00 Jeff Law l...@redhat.com:
 On 10/10/14 09:50, Ilya Enkovich wrote:

 Checks and and intersection removal code was added as a simple pass
 catching trivial cases.  I'm sure there are optimizations having
 common elements with what checker optimizer does.  But initially we
 didn't want to adopt existing optimizers because GIMPLE representation
 of instrumentation was not stable and also we still don't know what
 are important targets for optimizations.

 Understood.


 The plan is to have stable version first.  After enabling we want to
 make performance analysis and determine which optimizations are most
 required (it may appear checks removal doesn't give any significant
 performance gain at all), determine which of current infrastructure
 may be re-used (if any) and implement proper checker optimization.

 Current optimizer is a simple code cleanup.  I do not think we should
 make any significant rework of it as a part of enabling.  If current
 approach seems to require significant changes to go to trunk then it
 should be probably delayed and go separately from instrumentation
 pass.

 Well, I think it should be trivial to handle the redundant check elimination
 in DOM.

 Most likely eliminate_redundant_computations needs some work to allow it to
 look inside those checks and get them recorded into its tables.  With that
 in place, DOM should optimize this stuff without further intervention.  It's
 probably less code than you've already written :-)

 The swapping variant feels like it should be simple to implement with the
 existing dominator walkers.  But I haven't thought nearly as much about that
 one.

 jeff

I'll look into DOM and a possibility to use it for checks removal.
But I give higher priority to builtins instrumentation and therefore
prefer to delay this one and return to it after builtins
instrumentation work or in case there is some spare time for it.  This
patch is not critical for checker functionality and may be excluded
from initial commit.

Thanks,
Ilya


Re: [PATCH 2/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Evgeny Stupachenko
-#define PIC_OFFSET_TABLE_REGNUM \
-  ((TARGET_64BIT  (ix86_cmodel == CM_SMALL_PIC \
- || TARGET_PECOFF)) \
-   || !flag_pic ? INVALID_REGNUM \
-   : reload_completed ? REGNO (pic_offset_table_rtx) \
+#define PIC_OFFSET_TABLE_REGNUM \
+  ((TARGET_64BIT  (ix86_cmodel == CM_SMALL_PIC \
+ || TARGET_PECOFF)) \
+   || !flag_pic ? INVALID_REGNUM \
+   : pic_offset_table_rtx ? INVALID_REGNUM \
: REAL_PIC_OFFSET_TABLE_REGNUM)

No negative conditions, please. Also, please follow established
multi-level condition format, please see e.g. HARD_REGNO_NREGS
definition in i386.h.
I don't see how we can avoid negative condition here. If we remove
not from !flag_pic we'll need to add not to TARGET_64BIT and
TARGET_PECOFF.

I've done it this way:
+#define PIC_OFFSET_TABLE_REGNUM
 \
+  ((TARGET_64BIT  (ix86_cmodel == CM_SMALL_PIC   \
+ || TARGET_PECOFF))
 \
+   || !flag_pic
 \
+   ? INVALID_REGNUM\
+   : pic_offset_table_rtx  \
+ ? INVALID_REGNUM  \
+ : REAL_PIC_OFFSET_TABLE_REGNUM)

Is it ok?

On Fri, Oct 10, 2014 at 6:01 PM, Evgeny Stupachenko evstu...@gmail.com wrote:
 Updated ChangeLog:
 2014-10-10  Ilya Enkovich  ilya.enkov...@intel.com
 Vladimir Makarov  vmaka...@redhat.com

 * config/i386/i386.c (ix86_use_pseudo_pic_reg): New.
 (ix86_init_pic_reg): New.
 (ix86_select_alt_pic_regnum): Add check on pseudo register.
 (ix86_save_reg): Likewise.
 (ix86_expand_prologue): Remove PIC register initialization
 now performed in ix86_init_pic_reg.
 (ix86_output_function_epilogue): Add check on pseudo register.
 (set_pic_reg_ever_alive): New.
 (legitimize_pic_address): Replace df_set_regs_ever_live with new
 set_pic_reg_ever_alive.
 (legitimize_tls_address): Likewise.
 (ix86_pic_register_p): New check.
 (ix86_delegitimize_address): Add check on pseudo register.
 (ix86_expand_call): Insert move from pseudo PIC register to ABI
 defined REAL_PIC_OFFSET_TABLE_REGNUM.
 (TARGET_INIT_PIC_REG): New.
 (TARGET_USE_PSEUDO_PIC_REG): New.
 * config/i386/i386.h (PIC_OFFSET_TABLE_REGNUM): Return INVALID_REGNUM
 if pic_offset_table_rtx exists.

-  if (pic_offset_table_rtx)
+  if (pic_offset_table_rtx
+   (!reload_completed || !ix86_use_pseudo_pic_reg ()))

Hm, can you please add a comment for this change?

 I've added the following comment to the patch:
 -in which case we return (%ecx - %ebx) + foo.  */
 +in which case we return (%ecx - %ebx) + foo.
 +
 +Note that when pseudo_pic_reg is used we can generate it only
 +before reload_completed.  */

 On Fri, Oct 10, 2014 at 4:36 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Fri, Oct 10, 2014 at 02:34:07PM +0200, Rainer Orth wrote:
 Uros Bizjak ubiz...@gmail.com writes:

  On Fri, Oct 10, 2014 at 9:43 AM, Evgeny Stupachenko evstu...@gmail.com 
  wrote:
  i386 specific part of the patch:
 
  2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com
  Vladimir Makarov  vmaka...@redhat.com
  * gcc/config/i386/i386.c (ix86_use_pseudo_pic_reg): New.

 Evgeny: here and in your other submissions: drop the gcc prefix from the
 pathnames.  They are all relative to the directory the ChangeLog lives
 in.

 And add a blank line after after the e-mail lines.

 Jakub


Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling

2014-10-13 Thread Bernd Schmidt

On 10/13/2014 12:33 PM, Ilya Verbin wrote:

On 13 Oct 12:19, Jakub Jelinek wrote:

But I'd like to understand why is this one needed.
Why should the compilers care?  Aggregates layout and alignment of
integral/floating types must match between host and offload compilers, sure,
but isn't that something streamed already in the LTO bytecode?
Or is LTO streamer not streaming some types like long_type_node?


It isn't, see the preload_common_nodes code. Also, the backend needs to 
choose the right Pmode (and in the case of ptx, emit a directive about 
address sizes).



Bernd



Re: [PATCH 3/5] IPA ICF pass

2014-10-13 Thread Martin Liška

On 10/11/2014 02:05 AM, Martin Liška wrote:

On 09/26/2014 09:46 PM, Jan Hubicka wrote:

Hi,
this is on ipa-icf-gimple.c

@@ -2827,11 +2829,19 @@ cgraph_node::verify_node (void)
{
  if (verify_edge_corresponds_to_fndecl (e, decl))
{
- error (edge points to wrong declaration:);
- debug_tree (e-callee-decl);
- fprintf (stderr, Instead of:);
- debug_tree (decl);
- error_found = true;
+ /* The edge can be redirected in WPA by IPA 
ICF.
+Following check really ensures that it's
+not the case.  */
+
+ cgraph_node *current_node = cgraph_node::get 
(decl);
+ if (!current_node || 
!current_node-icf_merged)

I would move this into verify_edge_corresponds_to_fndecl.

diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
new file mode 100644
index 000..7031eaa
--- /dev/null
+++ b/gcc/ipa-icf-gimple.c
@@ -0,0 +1,384 @@
+/* Interprocedural Identical Code Folding pass
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+   Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska 
mli...@suse.cz
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+http://www.gnu.org/licenses/.  */

Please add toplevel comment about what the code does and how to use it.

+namespace ipa_icf {
+
+/* Basic block equivalence comparison function that returns true if
+   basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond.  */
... to each other?
I would add short comment that as comparsion goes you build voclabulary
of equivalences of variables/ssanames etc.
So people reading the code do not get lost at very beggining.

+
+bool
+func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2)
+{
+  unsigned i;
+  gimple_stmt_iterator gsi1, gsi2;
+  gimple s1, s2;
+
+  if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count
+  || bb1-edge_count != bb2-edge_count)
+return RETURN_FALSE ();

The UPPERCASE looks ugly.  I see that RETURN_FALSE is a warpper for 
return_false_with_msg
that outputs line and file information.

I would make it lowercase even if it is macro. You may consider using
CXX_MEM_STAT_INFO style default argument to avoid function macro completely.
Probably not big win given that it won't save you from preprocesor mess.
+
+  gsi1 = gsi_start_bb (bb1-bb);
+  gsi2 = gsi_start_bb (bb2-bb);
+
+  for (i = 0; i  bb1-nondbg_stmt_count; i++)
+{
+  if (is_gimple_debug (gsi_stmt (gsi1)))
+   gsi_next_nondebug (gsi1);
+
+  if (is_gimple_debug (gsi_stmt (gsi2)))
+   gsi_next_nondebug (gsi2);
+
+  s1 = gsi_stmt (gsi1);
+  s2 = gsi_stmt (gsi2);
+
+  if (gimple_code (s1) != gimple_code (s2))
+   return RETURN_FALSE_WITH_MSG (gimple codes are different);

I think you need to compare EH here.  Consider case where one unit
is compiled with -fno-exception and thus all EH regions are removed,
while other function has EH regions in it.  Those are not equivalent.

EH region is obtained by lookup_stmt_eh and then you need to comapre
them for match as you do with gimple_resx_regoin.

+  t1 = gimple_call_fndecl (s1);
+  t2 = gimple_call_fndecl (s2);
+
+  /* Function pointer variables are not supported yet.  */

They seems to be, compare_operand seems just right.

+
+/* Verifies for given GIMPLEs S1 and S2 that
+   label statements are semantically equivalent.  */
+
+bool
+func_checker::compare_gimple_label (gimple g1, gimple g2)
+{
+  if (m_ignore_labels)
+return true;
+
+  tree t1 = gimple_label_label (g1);
+  tree t2 = gimple_label_label (g2);
+
+  return compare_tree_ssa_label (t1, t2);
+}

I would expect the main BB loop to record BB in which label belongs to
and the BB assciatio neing checked here.
Otherwise I do not see how switch statements are compared to not have
different permutations of targets. Also note that one BB may have
multiple labels in them and they are equivalent.

Also I would punt on occurence of FORCED_LABEL. Those are tricky as they
may be passed around and compared for address and no one really defines
what should happen.  Better to avoid those.


Hi.

I will remove this support in the pass.



+
+/* Verifies for given 

[jit] Work around a bug in dejagnu.exp's host_execute

2014-10-13 Thread David Malcolm
Committed to branch dmalcolm/jit:

dejagnu.exp's host_execute has a bug, which (for me) reliably breaks
the jit testsuite when running make check (though not when
running make check-jit by itself, and appears to be unpleasantly
timing-related).

See the discussion at:
  http://lists.gnu.org/archive/html/dejagnu/2014-10/msg0.html

For now, work around it by adding a patched copy of host_execute to
jit.exp, deleting the bogus line.

gcc/testsuite/ChangeLog.jit:
* jit.dg/jit.exp (fixed_host_execute): New function, taken from
host_execute in DejaGnu's dejagnu.exp, with one line removed.
(jit-dg-test): Use fixed_host_execute, rathern than host_execute.
---
 gcc/testsuite/ChangeLog.jit  |   6 ++
 gcc/testsuite/jit.dg/jit.exp | 137 ++-
 2 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/ChangeLog.jit b/gcc/testsuite/ChangeLog.jit
index 7703212..97d0f8b 100644
--- a/gcc/testsuite/ChangeLog.jit
+++ b/gcc/testsuite/ChangeLog.jit
@@ -1,5 +1,11 @@
 2014-10-13  David Malcolm  dmalc...@redhat.com
 
+   * jit.dg/jit.exp (fixed_host_execute): New function, taken from
+   host_execute in DejaGnu's dejagnu.exp, with one line removed.
+   (jit-dg-test): Use fixed_host_execute, rathern than host_execute.
+
+2014-10-13  David Malcolm  dmalc...@redhat.com
+
* jit.dg/harness.h [MAKE_DEJAGNU_H_THREADSAFE] (note): Redefine
note from dejagnu.h to new function dejagnu_note so that we can
make note be threadsafe.
diff --git a/gcc/testsuite/jit.dg/jit.exp b/gcc/testsuite/jit.dg/jit.exp
index 4672063..76a1d9d 100644
--- a/gcc/testsuite/jit.dg/jit.exp
+++ b/gcc/testsuite/jit.dg/jit.exp
@@ -23,6 +23,135 @@ load_lib target-libpath.exp
 load_lib gcc.exp
 load_lib dejagnu.exp
 
+# This is host_execute from dejagnu.exp commit
+#   126a089777158a7891ff975473939f08c0e31a1c
+# with the following patch applied, and renaming to fixed_host_execute.
+# See the discussion at
+#  http://lists.gnu.org/archive/html/dejagnu/2014-10/msg0.html
+#
+#  --- /usr/share/dejagnu/dejagnu.exp.old  2014-10-08 13:38:57.274068541 -0400
+#  +++ /usr/share/dejagnu/dejagnu.exp  2014-10-10 12:27:51.113813659 -0400
+#  @@ -113,8 +113,6 @@ proc host_execute {args} {
+#   set timetol 0
+#   set arguments 
+#   
+#  -expect_before buffer_full { perror Buffer full }
+#  -
+#   if { [llength $args] == 0} {
+#  set executable $args
+#   } else {
+
+
+# Execute the executable file, and anaylyse the output for the
+# test state keywords.
+#Returns:
+#  A  (empty) string if everything worked, or an error message
+#  if there was a problem.
+#
+proc fixed_host_execute {args} {
+global text
+global spawn_id
+
+set timeoutmsg Timed out: Never got started, 
+set timeout 100
+set file all
+set timetol 0
+set arguments 
+
+if { [llength $args] == 0} {
+   set executable $args
+} else {
+   set executable [string trimleft [lindex [split $args  ] 0] \{]
+   set params [string trimleft [lindex [split $args  ] 1] \{]
+   set params [string trimright $params \}]
+}
+
+verbose The executable is $executable 2
+if {![file exists ${executable}]} {
+   perror The executable, \$executable\ is missing 0
+   return No source file found
+}
+
+# spawn the executable and look for the DejaGnu output messages from the
+# test case.
+# spawn -noecho -open [open |./${executable} r]
+spawn -noecho ./${executable} ${params}
+expect_after full_buffer { error got full_buffer }
+
+set prefix \[^\r\n\]*
+expect {
+   -re ^$prefix\[0-9\]\[0-9\]:..:..:${text}*\r\n {
+   regsub \[\n\r\t\]*NOTE: $text\r\n $expect_out(0,string)  output
+   verbose $output 3
+   set timetol 0
+   exp_continue
+   }
+   -re ^$prefix\tNOTE:${text}* {
+   regsub \[\n\r\t\]*NOTE: $text\r\n $expect_out(0,string)  output
+   set output [string range $output 6 end]
+   verbose $output 2
+   set timetol 0
+   exp_continue
+   }
+   -re ^$prefix\tPASSED:${text}* {
+   regsub \[\n\r\t\]*PASSED: $text\r\n $expect_out(0,string)  
output
+   set output [string range $output 8 end]
+   pass $output
+   set timetol 0
+   exp_continue
+   }
+   -re ^$prefix\tFAILED:${text}* {
+   regsub \[\n\r\t\]*FAILED: $text\r\n $expect_out(0,string)  
output
+   set output [string range $output 8 end]
+   fail $output
+   set timetol 0
+   exp_continue
+   }
+   -re ^$prefix\tUNTESTED:${text}* {
+   regsub \[\n\r\t\]*TESTED: $text\r\n $expect_out(0,string)  
output
+   set output [string range $output 8 end]
+   untested $output
+   set timetol 0
+   exp_continue
+   }
+   -re ^$prefix\tUNRESOLVED:${text}* {
+   regsub \[\n\r\t\]*UNRESOLVED: 

Re: [PATCH 3/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Evgeny Stupachenko
Patch updated with the comment:

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2a64d2d..5fd6a82 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12455,9 +12455,18 @@ ix86_address_cost (rtx x, enum machine_mode,
addr_space_t, bool)
  || REGNO (parts.index) = FIRST_PSEUDO_REGISTER)))
 cost++;

+  /* When address base or index is pic_offset_table_rtx we don't increase
+ address cost.  When a memop with pic_offset_table_rtx is not invariant
+ itself it most likely means that base or index is not invariant.
+ Therefore only pic_offset_table_rtx could be hoisted out, which is not
+ profitable for x86.  */
   if (parts.base
+   (!pic_offset_table_rtx
+ || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
(!REG_P (parts.base) || REGNO (parts.base) = FIRST_PSEUDO_REGISTER)
parts.index
+   (!pic_offset_table_rtx
+ || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
(!REG_P (parts.index) || REGNO (parts.index) = FIRST_PSEUDO_REGISTER)
parts.base != parts.index)
 cost++;

On Fri, Oct 10, 2014 at 3:04 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Fri, Oct 10, 2014 at 9:58 AM, Evgeny Stupachenko evstu...@gmail.com 
 wrote:
 the patch improves performance when previous are applied.
 It makes RTL loop invariant behavior for GOT loads same as it was
 before the 2 previous patches.

 The patch fixes x86 address cost so that cost for addresses with GOT
 register becomes less, how it was before enabling EBX.

 In x86_address_cost the result of “REGNO (parts.base) =
 FIRST_PSEUDO_REGISTER” for hard ebx was always false. The patch makes
 condition result
 the same when parts.base is GOT register (the same for parts.index).

 2014-10-08  Evgeny Stupachenko  evstu...@gmail.com
 * gcc/config/i386/i386.c (ix86_address_cost): Lower cost for
 when address contains GOT register.

 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
 index b43e870..9d8cfd1 100644
 --- a/gcc/config/i386/i386.c
 +++ b/gcc/config/i386/i386.c
 @@ -12497,8 +12497,12 @@ ix86_address_cost (rtx x, enum machine_mode,
 addr_space_t, bool)
  cost++;

 Please add a short comment here, explaining the reason for new condition.

if (parts.base
 +   (!pic_offset_table_rtx
 + || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
 (!REG_P (parts.base) || REGNO (parts.base) = 
 FIRST_PSEUDO_REGISTER)
 parts.index
 +   (!pic_offset_table_rtx
 + || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
 (!REG_P (parts.index) || REGNO (parts.index) = 
 FIRST_PSEUDO_REGISTER)
 parts.base != parts.index)
  cost++;

 Otherwise LGTM, but please repost the patch with a comment.

 Uros.


Re: [PATCH 2/3] libstdc++: Add put_time support.

2014-10-13 Thread Jonathan Wakely

On 13/10/14 13:08 +0100, Jonathan Wakely wrote:

On 15/04/14 23:20 +0200, Rüdiger Sonderfeld wrote:

Described in [ext.manip].

* libstdc++-v3/include/std/iomanip (_Put_time): New struct.
(put_time): New manipulator.
(operator): New overloaded function.
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/1.cc:
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc:
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/1.cc:
* libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/2.cc:
New file.


The 27_io/manipulators/extended/put_time/char/2.cc and
27_io/manipulators/extended/put_time/wchar_t/2.cc tests fail for me.

i2.exe: 
/home/jwakely/src/gcc/gcc/libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc:41:
 void test01(): Assertion `oss.str() == Son 1971' failed.
FAIL: 27_io/manipulators/extended/put_time/char/2.cc execution test


With my de_DE.utf8 locale the output is So 1971 not Son 1971.

$ LANG=de_DE.utf8 date +%a 
Mo




Re: [PATCH] move many gc hashtab to hash_table

2014-10-13 Thread H.J. Lu
On Fri, Oct 10, 2014 at 1:44 PM, Trevor Saunders tsaund...@mozilla.com wrote:
 On Fri, Oct 10, 2014 at 02:26:00PM -0600, Jeff Law wrote:
 On 10/06/14 21:24, tsaund...@mozilla.com wrote:
 From: Trevor Saunders tsaund...@mozilla.com
 
 Hi,
 
 This changes almost all of the ggc htab that don't use the if_marked option 
 to
 be hash_tables.  I added a for_user gty attribute so that types could be 
 used
 from user marking routines without either using the mangled names or being
 referenced in a template argument which was previously the only way to get
 gengtype to emit user callable marking routines.
 
 bootstrapped + regtested on x86_64-unknown-linux-gnu, ran make all-gcc for
 ppc64-linux, ppc64-apple-darwin, mips-elf, m32c-elf, mep-elf, and
 x86_64-apple-darwin.  I haven't heard back yet on my request for a cfarm
 account so I haven't tried bootstrapping on other archs, but more testing is
 most welcome. Ok?
 
 Trev
 
 
 gcc/
 
 * asan.c, cfgloop.c, cfgloop.h, cgraph.c, cgraph.h,
  config/darwin.c, config/m32c/m32c.c, config/mep/mep.c,
  config/mips/mips.c, config/rs6000/rs6000.c, dwarf2out.c,
  function.c, function.h, gimple-ssa.h, libfuncs.h, optabs.c,
  output.h, rtl.h, sese.c, symtab.c, tree-cfg.c, tree-dfa.c,
  tree-ssa.c, varasm.c: Use hash-table instead of hashtab.
  * doc/gty.texi (for_user): Document new option.
  * gengtype.c (create_user_defined_type): Don't try to get a struct for
  char.
  (walk_type): Don't error out on for_user option.
  (write_func_for_structure): Emit user marking routines if requested by
  for_user option.
  (write_local_func_for_structure): Likewise.
  (main): Mark types with for_user option as used.
  * ggc.h (gt_pch_nx): Add overload for unsigned int.
  * hash-map.h (hash_map::hash_entry::pch_nx_helper): AddOverloads.
  * hash-table.h (ggc_hasher): New struct.
  (hash_table::create_ggc): New function.
  (gt_pch_nx): New overload for hash_table.
 
 java/
 
  * class.c, decl.c, except.c, expr.c, java-tree.h, lang.c: Use
  hash_table instead of hashtab.
 
 objc/
 
  * objc-act.c: use hash_table instead of hashtab.
 
 cp/
 
  * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c,
  pt.c, semantics.c, tree.c, typeck2.c: Use hash_table instead of
  hashtab.
 
 fortran/
 
  * trans-decl.c, trans.c, trans.h: Use hash_table instead of hashtab.
 
 c-family/
 
  * c-common.c: Use hash_table instead of hashtab.
 Whee, meaning we no longer have to pass through void * pointers and the
 visually ugly casting that requires in the callbacks?!?  Yea!

 yeah, you can even use an iterator now if you like

 -++*((unsigned HOST_WIDE_INT *) data);
 +++* data;
 I think coding standards require no space here.  Obviously a nit.

 oops, definitely should be fixed :)

 This is obviously a fairly mechanical change.  I did some spot checking and
 it looks good.  I don't expect any fallout.

 Well, its large enough I expect *something* but I doubt it'll be a big
 deal.

 Good for the trunk.

 thanks for the review, I know it was rather big.

This breaks bootstrap on Linux/x86:

https://gcc.gnu.org/ml/gcc-regression/2014-10/msg00237.html

This patch fixes it.

-- 
H.J.
--
diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index 6b8f706..40d32d8 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -3744,7 +3744,7 @@ mangle_conv_op_name_for_type (const tree type)
   char buffer[64];

/* Create a unique name corresponding to TYPE.  */
-  sprintf (buffer, operator %lu, conv_type_names-elements ());
+  sprintf (buffer, operator %lu, (long) conv_type_names-elements ());
   identifier = get_identifier (buffer);
   *slot = identifier;

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 47b5d93..390e63e 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -22324,12 +22324,12 @@ void
 print_template_statistics (void)
 {
   fprintf (stderr, decl_specializations: size %ld, %ld elements, 
-   %f collisions\n, decl_specializations-size (),
-   decl_specializations-elements (),
+   %f collisions\n, (long) decl_specializations-size (),
+   (long) decl_specializations-elements (),
decl_specializations-collisions ());
   fprintf (stderr, type_specializations: size %ld, %ld elements, 
-   %f collisions\n, type_specializations-size (),
-   type_specializations-elements (),
+   %f collisions\n, (long) type_specializations-size (),
+   (long) type_specializations-elements (),
type_specializations-collisions ());
 }


Re: New rematerialization sub-pass in LRA

2014-10-13 Thread Evgeny Stupachenko
I don't see significant performance changes from the patch (with and
without patch enabling ebx) on x86 in 32bits mode.

Thanks,
Evgeny

On Sat, Oct 11, 2014 at 2:31 AM, Sebastian Pop seb...@gmail.com wrote:
 Vladimir Makarov wrote:
   I've tested and benchmarked the sub-pass on x86-64 and ARM.  The
 sub-pass permits to generate a smaller code in average on both
 architecture (although improvement no-significant), adds  0.4%
 additional compilation time in -O2 mode of release GCC (according user
 time of compilation of 500K lines fortran program and valgrind lakey #
 insns in combine.i compilation) and about 0.7% in -O0 mode.  As the
 performance result, the best I found is 1% SPECFP2000 improvement on
 ARM Ecynos 5410 (973 vs 963) but for Intel Haswell the performance
 results are practically the same (Haswell has a very good
 sophisticated memory sub-system).

 On aarch64 I have seen some minor perf improvements to libpng compress and
 decompress.  The patch does not change the perf for all other benchmarks that 
 I
 have tested.

 Thanks,
 Sebastian


Re: [PATCH] move many gc hashtab to hash_table

2014-10-13 Thread H.J. Lu
On Mon, Oct 13, 2014 at 8:33 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Fri, Oct 10, 2014 at 1:44 PM, Trevor Saunders tsaund...@mozilla.com 
 wrote:
 On Fri, Oct 10, 2014 at 02:26:00PM -0600, Jeff Law wrote:
 On 10/06/14 21:24, tsaund...@mozilla.com wrote:
 From: Trevor Saunders tsaund...@mozilla.com
 
 Hi,
 
 This changes almost all of the ggc htab that don't use the if_marked 
 option to
 be hash_tables.  I added a for_user gty attribute so that types could be 
 used
 from user marking routines without either using the mangled names or being
 referenced in a template argument which was previously the only way to get
 gengtype to emit user callable marking routines.
 
 bootstrapped + regtested on x86_64-unknown-linux-gnu, ran make all-gcc for
 ppc64-linux, ppc64-apple-darwin, mips-elf, m32c-elf, mep-elf, and
 x86_64-apple-darwin.  I haven't heard back yet on my request for a cfarm
 account so I haven't tried bootstrapping on other archs, but more testing 
 is
 most welcome. Ok?
 
 Trev
 
 
 gcc/
 
 * asan.c, cfgloop.c, cfgloop.h, cgraph.c, cgraph.h,
  config/darwin.c, config/m32c/m32c.c, config/mep/mep.c,
  config/mips/mips.c, config/rs6000/rs6000.c, dwarf2out.c,
  function.c, function.h, gimple-ssa.h, libfuncs.h, optabs.c,
  output.h, rtl.h, sese.c, symtab.c, tree-cfg.c, tree-dfa.c,
  tree-ssa.c, varasm.c: Use hash-table instead of hashtab.
  * doc/gty.texi (for_user): Document new option.
  * gengtype.c (create_user_defined_type): Don't try to get a struct for
  char.
  (walk_type): Don't error out on for_user option.
  (write_func_for_structure): Emit user marking routines if requested by
  for_user option.
  (write_local_func_for_structure): Likewise.
  (main): Mark types with for_user option as used.
  * ggc.h (gt_pch_nx): Add overload for unsigned int.
  * hash-map.h (hash_map::hash_entry::pch_nx_helper): AddOverloads.
  * hash-table.h (ggc_hasher): New struct.
  (hash_table::create_ggc): New function.
  (gt_pch_nx): New overload for hash_table.
 
 java/
 
  * class.c, decl.c, except.c, expr.c, java-tree.h, lang.c: Use
  hash_table instead of hashtab.
 
 objc/
 
  * objc-act.c: use hash_table instead of hashtab.
 
 cp/
 
  * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c,
  pt.c, semantics.c, tree.c, typeck2.c: Use hash_table instead of
  hashtab.
 
 fortran/
 
  * trans-decl.c, trans.c, trans.h: Use hash_table instead of hashtab.
 
 c-family/
 
  * c-common.c: Use hash_table instead of hashtab.
 Whee, meaning we no longer have to pass through void * pointers and the
 visually ugly casting that requires in the callbacks?!?  Yea!

 yeah, you can even use an iterator now if you like

 -++*((unsigned HOST_WIDE_INT *) data);
 +++* data;
 I think coding standards require no space here.  Obviously a nit.

 oops, definitely should be fixed :)

 This is obviously a fairly mechanical change.  I did some spot checking and
 it looks good.  I don't expect any fallout.

 Well, its large enough I expect *something* but I doubt it'll be a big
 deal.

 Good for the trunk.

 thanks for the review, I know it was rather big.

 This breaks bootstrap on Linux/x86:

 https://gcc.gnu.org/ml/gcc-regression/2014-10/msg00237.html

 This patch fixes it.

This is what I checked in.


-- 
H.J.
---
Index: ChangeLog
===
--- ChangeLog (revision 216150)
+++ ChangeLog (working copy)
@@ -1,3 +1,9 @@
+2014-10-13  H.J. Lu  hongjiu...@intel.com
+
+ * mangle.c (mangle_conv_op_name_for_type): Cast elements to
+ unsigned long.
+ (print_template_statistics): Cast size and elements to long.
+
 2014-10-12  Trevor Saunders  tsaund...@mozilla.com

  * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c,
Index: mangle.c
===
--- mangle.c (revision 216150)
+++ mangle.c (working copy)
@@ -3744,7 +3744,8 @@ mangle_conv_op_name_for_type (const tree
   char buffer[64];

/* Create a unique name corresponding to TYPE.  */
-  sprintf (buffer, operator %lu, conv_type_names-elements ());
+  sprintf (buffer, operator %lu,
+   (unsigned long) conv_type_names-elements ());
   identifier = get_identifier (buffer);
   *slot = identifier;

Index: pt.c
===
--- pt.c (revision 216150)
+++ pt.c (working copy)
@@ -22324,12 +22324,12 @@ void
 print_template_statistics (void)
 {
   fprintf (stderr, decl_specializations: size %ld, %ld elements, 
-   %f collisions\n, decl_specializations-size (),
-   decl_specializations-elements (),
+   %f collisions\n, (long) decl_specializations-size (),
+   (long) decl_specializations-elements (),
decl_specializations-collisions ());
   fprintf (stderr, type_specializations: size %ld, %ld elements, 
-   %f collisions\n, type_specializations-size (),
-   type_specializations-elements 

Re: [PATCH 0/2] xtensa: fix floating-point parts of machine description

2014-10-13 Thread augustine.sterl...@gmail.com
On Sun, Oct 12, 2014 at 3:46 PM, Max Filippov jcmvb...@gmail.com wrote:
 Hi Sterling,

 this series fixes two bugs in xtensa.md:

HI Max, thanks for this. I don't see a patch though.


Re: [PATCH 2/2] xtensa: use pre- and postincrement FP load/store when available

2014-10-13 Thread augustine.sterl...@gmail.com
On Sun, Oct 12, 2014 at 3:46 PM, Max Filippov jcmvb...@gmail.com wrote:
 2014-10-10  Max Filippov  jcmvb...@gmail.com

 gcc/
 * config/xtensa/xtensa.h (TARGET_HARD_FLOAT_POSTINC): new macro.
 * config/xtensa/xtensa.md (*lsiu, *ssiu): add dependency on
 !TARGET_HARD_FLOAT_POSTINC.
 (*lsip, *ssip): new instructions.

Approved. Do you have write priviliges?


Re: [PATCH 1/2] xtensa: drop unimplemented floating point operations

2014-10-13 Thread augustine.sterl...@gmail.com
On Sun, Oct 12, 2014 at 3:46 PM, Max Filippov jcmvb...@gmail.com wrote:
 xtensa ISA never implemented FP division, reciprocal, square root and
 inverse square root as single opcode. Remove patterns that can emit
 them.

 2014-10-09  Max Filippov  jcmvb...@gmail.com

 gcc/
 * config/xtensa/xtensa.md (divsf3, *recipsf2, sqrtsf2, *rsqrtsf2):
 remove.

Approved.


Re: [PATCH] Add zero-overhead looping for xtensa backend

2014-10-13 Thread augustine.sterl...@gmail.com
On Fri, Oct 10, 2014 at 6:59 AM, Felix Yang fei.yang0...@gmail.com wrote:
 Hi Sterling,

 I made some improvement to the patch. Two changes:
 1. TARGET_LOOPS is now used as a condition of the doloop related
 patterns, which is more elegant.

Fine.

 2. As the trip count register of the zero-cost loop maybe
 potentially spilled, we need to change the patterns in order to handle
 this issue.

Actually, for xtensa you don't. The trip count is copied into LCOUNT
at the execution of the loop instruction, and therefore a spill or
whatever doesn't matter--it won't affect the result. So as long as you
have the trip count at the start of the loop, you are fine.

This does bring up an issue of whether or not the trip count can be
modified during the loop. (note that this is different than early
exit.) If it can, you can't use a zero-overhead loop. Does your patch
address this case.

The solution is similar to that adapted by c6x backend.
 Just turn the zero-cost loop into a regular loop when that happens
 when reload is completed.
 Attached please find version 4 of the patch. Make check regression
 tested with xtensa-elf-gcc/simulator.
 OK for trunk?


Re: [PATCH 2/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Uros Bizjak
On Mon, Oct 13, 2014 at 5:01 PM, Evgeny Stupachenko evstu...@gmail.com wrote:
-#define PIC_OFFSET_TABLE_REGNUM \
-  ((TARGET_64BIT  (ix86_cmodel == CM_SMALL_PIC \
- || TARGET_PECOFF)) \
-   || !flag_pic ? INVALID_REGNUM \
-   : reload_completed ? REGNO (pic_offset_table_rtx) \
+#define PIC_OFFSET_TABLE_REGNUM \
+  ((TARGET_64BIT  (ix86_cmodel == CM_SMALL_PIC \
+ || TARGET_PECOFF)) \
+   || !flag_pic ? INVALID_REGNUM \
+   : pic_offset_table_rtx ? INVALID_REGNUM \
: REAL_PIC_OFFSET_TABLE_REGNUM)

No negative conditions, please. Also, please follow established
multi-level condition format, please see e.g. HARD_REGNO_NREGS
definition in i386.h.
 I don't see how we can avoid negative condition here. If we remove
 not from !flag_pic we'll need to add not to TARGET_64BIT and
 TARGET_PECOFF.

 I've done it this way:
 +#define PIC_OFFSET_TABLE_REGNUM
  \
 +  ((TARGET_64BIT  (ix86_cmodel == CM_SMALL_PIC   \
 + || TARGET_PECOFF))
  \
 +   || !flag_pic
  \
 +   ? INVALID_REGNUM\
 +   : pic_offset_table_rtx  \
 + ? INVALID_REGNUM  \
 + : REAL_PIC_OFFSET_TABLE_REGNUM)

 Is it ok?

Oh, indeed. I missed the logical or. Maybe put the first condition
into parenthesis, to avoid confusion.

OK in any case.

Thanks,
Uros.


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [15/n] Optimize redundant checks

2014-10-13 Thread Jeff Law

On 10/13/14 08:58, Ilya Enkovich wrote:


I'll look into DOM and a possibility to use it for checks removal.
But I give higher priority to builtins instrumentation and therefore
prefer to delay this one and return to it after builtins
instrumentation work or in case there is some spare time for it.  This
patch is not critical for checker functionality and may be excluded
from initial commit.

OK.

When you're ready to look at DOM, don't hesitate to contact me.  I 
probably know that code better than anyone.


jeff


Re: [PATCH 3/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Uros Bizjak
On Mon, Oct 13, 2014 at 5:17 PM, Evgeny Stupachenko evstu...@gmail.com wrote:
 Patch updated with the comment:

 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
 index 2a64d2d..5fd6a82 100644
 --- a/gcc/config/i386/i386.c
 +++ b/gcc/config/i386/i386.c
 @@ -12455,9 +12455,18 @@ ix86_address_cost (rtx x, enum machine_mode,
 addr_space_t, bool)
   || REGNO (parts.index) = FIRST_PSEUDO_REGISTER)))
  cost++;

 +  /* When address base or index is pic_offset_table_rtx we don't increase
 + address cost.  When a memop with pic_offset_table_rtx is not invariant
 + itself it most likely means that base or index is not invariant.
 + Therefore only pic_offset_table_rtx could be hoisted out, which is not
 + profitable for x86.  */
if (parts.base
 +   (!pic_offset_table_rtx
 + || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
 (!REG_P (parts.base) || REGNO (parts.base) = FIRST_PSEUDO_REGISTER)
 parts.index
 +   (!pic_offset_table_rtx
 + || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
 (!REG_P (parts.index) || REGNO (parts.index) = 
 FIRST_PSEUDO_REGISTER)
 parts.base != parts.index)
  cost++;

LGTM.

OK.

Thanks,
Uros.


Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Uros Bizjak
On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com wrote:

 ChangeLog for testsuite:

 2014-10-13  Evgeny Stupachenko  evstu...@gmail.com

 PR target/8340
 PR middle-end/47602
 PR rtl-optimization/55458
 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now.
 * gcc.target/i386/pr55458.c: Likewise.
 * gcc.target/i386/pr47602.c: New.
 * gcc.target/i386/pr23098.c: Move to XFAIL.

Reversed patch was attached. Please repost.

Uros.


RE: New rematerialization sub-pass in LRA

2014-10-13 Thread Wilco Dijkstra
   Here is a new rematerialization sub-pass of LRA.
 
   I've tested and benchmarked the sub-pass on x86-64 and ARM.  The
 sub-pass permits to generate a smaller code in average on both
 architecture (although improvement no-significant), adds  0.4%
 additional compilation time in -O2 mode of release GCC (according user
 time of compilation of 500K lines fortran program and valgrind lakey #
 insns in combine.i compilation) and about 0.7% in -O0 mode.  As the
 performance result, the best I found is 1% SPECFP2000 improvement on
 ARM Ecynos 5410 (973 vs 963) but for Intel Haswell the performance
 results are practically the same (Haswell has a very good
 sophisticated memory sub-system).

I ran SPEC2k on AArch64, and EON fails to run correctly with -fno-caller-saves
-mcpu=cortex-a57 -fomit-frame-pointer -Ofast. I'm not sure whether this is
AArch64 specific, but previously non-optimal register allocation choices 
triggered
A latent bug in ree (it's unclear why GCC still allocates FP registers in 
high-pressure integer code, as I set the costs for int-FP moves high).

On SPECINT2k performance is ~0.5% worse (5.5% regression on perlbmk), and 
SPECFP is ~0.2% faster.

Generally I think it is good to have a specific pass for rematerialization.
However should this not also affect the costs of instructions that can be 
cheaply rematerialized? Similarly for the choice whether to caller save or 
spill 
(today the caller-save code doesn't care at all about rematerialization, so it 
aggressively caller-saves values which could be rematerialized - see eg. 
https://gcc.gnu.org/ml/gcc/2014-09/msg00071.html).

Also I am confused by the claim memory reads are not profitable to 
rematerialize. 
Surely rematerializing a memory read from const-data or literal pool is cheaper
than spilling as you avoid a store to the stack?

Wilco


 




Re: PING: Re: [patch] tag ../include/*

2014-10-13 Thread Jeff Law

On 10/13/14 08:53, Aldy Hernandez wrote:

PING

On 10/07/14 09:37, Aldy Hernandez wrote:

Is there a reason we don't create etags for toplevel include files?  If
not, could I please apply this patch?

ENOPATCH

jeff


Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Jeff Law

On 10/13/14 08:53, Evgeny Stupachenko wrote:

ChangeLog for testsuite:

2014-10-13  Evgeny Stupachenko  evstu...@gmail.com

 PR target/8340
 PR middle-end/47602
 PR rtl-optimization/55458
 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now.
 * gcc.target/i386/pr55458.c: Likewise.
 * gcc.target/i386/pr47602.c: New.
 * gcc.target/i386/pr23098.c: Move to XFAIL.


Looks like you goof'd the patch slightly (reversed).

It's trivial enough that I can see what the correctly ordered patch is 
doing.


OK for the trunk at the same time the rest of the bits go in.

jeff



Re: [PATCH] Add zero-overhead looping for xtensa backend

2014-10-13 Thread Felix Yang
Thanks for the comments.

The patch checked the usage of teh trip count register, making sure
that it is not used in the loop body other than the doloop_end or
lives past the doloop_end instruction, as the following code snippet
shows:

+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (loop-iter_reg_used || loop-iter_reg_used_outside)
+{
+  if (dump_file)
+fprintf (dump_file, ;; loop %d uses iterator\n,
+ loop-loop_no);
+  return false;
+}

For the spill issue, I think we need to handle it. The reason is
that currently we are not telling GCC about the existence of the
LCOUNT register. Instead, we keep the trip count in a general register
and it's possible that this register can be spilled when register
pressure is high.
It's a good idea to post another patch to describe the LCOUNT
register in GCC in order to free this general register. But I want
this patch applied as a first step, OK?

Cheers,
Felix


On Tue, Oct 14, 2014 at 12:09 AM, augustine.sterl...@gmail.com
augustine.sterl...@gmail.com wrote:
 On Fri, Oct 10, 2014 at 6:59 AM, Felix Yang fei.yang0...@gmail.com wrote:
 Hi Sterling,

 I made some improvement to the patch. Two changes:
 1. TARGET_LOOPS is now used as a condition of the doloop related
 patterns, which is more elegant.

 Fine.

 2. As the trip count register of the zero-cost loop maybe
 potentially spilled, we need to change the patterns in order to handle
 this issue.

 Actually, for xtensa you don't. The trip count is copied into LCOUNT
 at the execution of the loop instruction, and therefore a spill or
 whatever doesn't matter--it won't affect the result. So as long as you
 have the trip count at the start of the loop, you are fine.

 This does bring up an issue of whether or not the trip count can be
 modified during the loop. (note that this is different than early
 exit.) If it can, you can't use a zero-overhead loop. Does your patch
 address this case.

 The solution is similar to that adapted by c6x backend.
 Just turn the zero-cost loop into a regular loop when that happens
 when reload is completed.
 Attached please find version 4 of the patch. Make check regression
 tested with xtensa-elf-gcc/simulator.
 OK for trunk?


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [12/n] Optimize string functions

2014-10-13 Thread Jeff Law

On 10/13/14 08:51, Ilya Enkovich wrote:

I had assumed the gimple_call_set_fndecl was a nop if we didn't
change the fndecl.  Is that not the case?


Right.  But (!fndecl_nochk) doesn't mean we didn't change fndecl because there 
is another set to fndecl above.

Thanks for clarifying.

This is OK when the builtins are done.

jeff



Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Evgeny Stupachenko
Reattached.

On Mon, Oct 13, 2014 at 8:22 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com 
 wrote:

 ChangeLog for testsuite:

 2014-10-13  Evgeny Stupachenko  evstu...@gmail.com

 PR target/8340
 PR middle-end/47602
 PR rtl-optimization/55458
 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now.
 * gcc.target/i386/pr55458.c: Likewise.
 * gcc.target/i386/pr47602.c: New.
 * gcc.target/i386/pr23098.c: Move to XFAIL.

 Reversed patch was attached. Please repost.

 Uros.


enabling_ebx_tests.patch
Description: Binary data


Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread Uros Bizjak
On Mon, Oct 13, 2014 at 6:32 PM, Evgeny Stupachenko evstu...@gmail.com wrote:
 Reattached.

 On Mon, Oct 13, 2014 at 8:22 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com 
 wrote:

 ChangeLog for testsuite:

 2014-10-13  Evgeny Stupachenko  evstu...@gmail.com

 PR target/8340
 PR middle-end/47602
 PR rtl-optimization/55458
 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now.
 * gcc.target/i386/pr55458.c: Likewise.
 * gcc.target/i386/pr47602.c: New.
 * gcc.target/i386/pr23098.c: Move to XFAIL.

 Reversed patch was attached. Please repost.

OK.

Thanks,
Uros.


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [8/n] Remove useless builtin calls

2014-10-13 Thread Jeff Law

On 10/13/14 08:07, Ilya Enkovich wrote:

Tests instrumentation are still needed.  With some basic tests and
instrumentation this will be OK.

I hate to be harping tests, but few developers are going to be
familiar with the MPX and related infrastructure and those tests are
critical to helping them know when they break something.

Similarly if the plan is to iterate on improving things, then those
basic functionality tests will ultimately save time as you can smoke
test before running larger benchmarks.


jeff


Here is a version with tests added.

Thanks,
Ilya
--
gcc/

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* tree-chkp.c (chkp_remove_useless_builtins): New.
(chkp_execute): Remove useless calls to Pointer Bounds
Checker builtins.

gcc/testsuite

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* gcc.target/i386/chkp-builtins-1.c: New.
* gcc.target/i386/chkp-builtins-2.c: New.
* gcc.target/i386/chkp-builtins-3.c: New.
* gcc.target/i386/chkp-builtins-4.c: New.

OK.
Jeff



Re: [PATCH, Pointer Bounds Checker 14/x] Passes [9/n] Static constructors

2014-10-13 Thread Jeff Law

On 10/13/14 06:41, Ilya Enkovich wrote:

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* tree-chkp.c (MAX_STMTS_IN_STATIC_CHKP_CTOR): New.
(chkp_ctor_stmt_list): New.
(chkp_register_var_initializer): New.
(chkp_add_modification_to_stmt_list): New.
(chkp_output_static_bounds): New.
(chkp_finish_file): New.
(chkp_instrument_function): Remove useless statements
from static bounds constructors.
* tree-chkp.h (chkp_register_var_initializer): New.
(chkp_finish_file): New.
* doc/invoke.texi (chkp-max-ctor-size): New.
* params.def (PARAM_CHKP_MAX_CTOR_SIZE): New.

OK.
jeff



Re: [PATCH, Pointer Bounds Checker 14/x] Passes [10/n] Stores handler

2014-10-13 Thread Jeff Law

On 10/13/14 05:23, Ilya Enkovich wrote:

2014-10-09 22:51 GMT+04:00 Jeff Law l...@redhat.com:

On 10/08/14 13:12, Ilya Enkovich wrote:


Hi,

This patch adds an assignment processing function which is used by lnliner
for newly generated stores.

Thanks,
Ilya
--
2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com

 * tree-chkp.c (chkp_copy_bounds_for_assign): New.
 * tree-chkp.h (chkp_copy_bounds_for_assign): New.


This probably should have been part of the inliner submission since that's
the only place its used and one needs the inliner context to know how this
function is going to be used.

Presumably the reason its not in tree-inline and static is you want to
utilize chkp_walk_pointer_assignments?

The code is fine, just want to make sure its goes into a logical place.

Jeff




I have to export either chkp_copy_bounds_for_assign or
chkp_walk_pointer_assignments with chkp_copy_bounds_for_elem.  No much
difference but I'd prefer to keep all memrefs processing codes in
tree-chkp.c.

OK.
jeff



[jit] Fix a comment in configure.ac

2014-10-13 Thread David Malcolm
Committed to branch dmalcolm/jit:

The relevant code in jit/internal-api.c was moved to jit/jit-playback.c
in 3071567787aef4a8ada8b38c890d01c19b4b998f.

gcc/ChangeLog.jit:
* configure.ac: Update a reference to jit/internal-api.c to
jit/jit-playback.c.
* configure: Regenerate.
---
 gcc/ChangeLog.jit | 6 ++
 gcc/configure | 2 +-
 gcc/configure.ac  | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog.jit b/gcc/ChangeLog.jit
index c887473..340c9de 100644
--- a/gcc/ChangeLog.jit
+++ b/gcc/ChangeLog.jit
@@ -1,3 +1,9 @@
+2014-10-13  David Malcolm  dmalc...@redhat.com
+
+   * configure.ac: Update a reference to jit/internal-api.c to
+   jit/jit-playback.c.
+   * configure: Regenerate.
+
 2014-10-07  David Malcolm  dmalc...@redhat.com
 
* Makefile.in (site.exp): When constructing site.exp, add a line
diff --git a/gcc/configure b/gcc/configure
index 8ec141f..81634f2 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28191,7 +28191,7 @@ _ACEOF
 fi
 
 # Generate gcc-driver-name.h containing GCC_DRIVER_NAME for the benefit
-# of jit/internal-api.c.
+# of jit/jit-playback.c.
 cat  gcc-driver-name.h EOF
 #define GCC_DRIVER_NAME ${target_noncanonical}-gcc-${gcc_BASEVER}${exeext}
 EOF
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 62e5ad7..0af7a77 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5680,7 +5680,7 @@ if test x${LINKER_HASH_STYLE} != x; then
 fi
 
 # Generate gcc-driver-name.h containing GCC_DRIVER_NAME for the benefit
-# of jit/internal-api.c.
+# of jit/jit-playback.c.
 cat  gcc-driver-name.h EOF
 #define GCC_DRIVER_NAME ${target_noncanonical}-gcc-${gcc_BASEVER}${exeext}
 EOF
-- 
1.7.11.7



[committed] HP-UX 11 reentrant functions

2014-10-13 Thread John David Anglin
Most systems don't guard the C reentrant functions but on HP-UX 11  
they are not declared

unless _REENTRANT is defined.  This causes problems for configure, etc.

The attached change adds the _REENTRANT define when _HPUX_SOURCE is  
defined.

A similar change was applied to HP-UX 10 three years ago.

This resolves PR libfortran/63471.

Tested on hppa2.0w-hp-hpux11.11.  Committed to trunk.

Dave
--
John David Anglin   dave.ang...@bell.net


2014-01-13  John David Anglin  dave.ang...@nrc-cnrc.gc.ca

PR libfortran/63471
* config/pa/pa-hpux11.h (TARGET_OS_CPP_BUILTINS): Define _REENTRANT
when _HPUX_SOURCE is defined.

Index: config/pa/pa-hpux11.h
===
--- config/pa/pa-hpux11.h   (revision 216000)
+++ config/pa/pa-hpux11.h   (working copy)
@@ -40,6 +40,7 @@
if (c_dialect_cxx ())   \
  { \
builtin_define (_HPUX_SOURCE);\
+   builtin_define (_REENTRANT);  \
builtin_define (_INCLUDE_LONGLONG);   \
builtin_define (__STDCPP__);  \
  } \
@@ -48,6 +49,7 @@
if (!flag_iso)  \
  { \
builtin_define (_HPUX_SOURCE);\
+   builtin_define (_REENTRANT);  \
if (preprocessing_trad_p ())\
  { \
builtin_define (hp9000s800);  \


Re: PING: Re: [patch] tag ../include/*

2014-10-13 Thread Aldy Hernandez

On 10/13/14 09:27, Jeff Law wrote:

On 10/13/14 08:53, Aldy Hernandez wrote:

PING

On 10/07/14 09:37, Aldy Hernandez wrote:

Is there a reason we don't create etags for toplevel include files?  If
not, could I please apply this patch?

ENOPATCH

jeff


Sorry, patch was in original message.  Attached here.
commit a679529d14f005d8c88517f72d2b5295d8c82f0f
Author: Aldy Hernandez al...@redhat.com
Date:   Tue Oct 7 09:32:21 2014 -0700

* Makefile.in (TAGS): Tag ../include files.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 97b439a..df43b9c 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3772,6 +3772,7 @@ TAGS: lang.tags
  fi;   \
done;   \
etags -o TAGS.sub c-family/*.h c-family/*.c *.h *.c *.cc \
+ ../include/*.h \
  --language=none --regex=/\(char\|unsigned 
int\|int\|bool\|void\|HOST_WIDE_INT\|enum [A-Za-z_0-9]+\) 
[*]?\([A-Za-z_0-9]+\)/\2/ common.opt\
  --language=none 
--regex=/\(DEF_RTL_EXPR\|DEFTREECODE\|DEFGSCODE\).*(\([A-Za-z_0-9]+\)/\2/ 
rtl.def tree.def gimple.def \
  --language=none --regex=/DEFTIMEVAR (\([A-Za-z_0-9]+\)/\1/ 
timevar.def \


Re: [RFC: Patch, PR 60102] [4.9/4.10 Regression] powerpc fp-bit ices@dwf_regno

2014-10-13 Thread David Edelsohn
On Mon, Oct 13, 2014 at 9:20 AM, Ulrich Weigand uweig...@de.ibm.com wrote:
 Maciej W. Rozycki wrote:
 On Thu, 9 Oct 2014, Maciej W. Rozycki wrote:

   Seeing Rohit got good results it has struck me that perhaps one of the
  patches I had previously reverted, to be able to compile GCC in the first
  place, interfered with this fix -- I backed out all the subsequent patches
  to test yours and Rohit's by themselves only.  And it was actually the
  case, with this change:
 
  2013-05-21  Christian Bruel  christian.br...@st.com
 
  * dwarf2out.c (multiple_reg_loc_descriptor): Use dbx_reg_number for
  spanning registers. LEAF_REG_REMAP is supported only for contiguous
  registers. Set register size out of the PARALLEL loop.
 
  back in place, in addition to your fix, I get an all-passed score for
  gdb.base/store.exp.  So your change looks good and my decision to back out
  the other patches unfortunate.  I'll yet run full e500v2 testing now to
  double check, and let you know what the results are, within a couple of
  hours if things work well.

  It took a bit more because I saw some regressions that I wanted to
 investigate.  In the end they turned out intermittent and the failures
 happen sometimes whether your change is applied or not.  So I'm fine with
 your change, thanks for your work and patience.

 Thanks for verifying!

 David, is the patch OK to commit now?

Okay with me.

Thanks!
David


[PATCH 0/5] Merger of jit branch (v2)

2014-10-13 Thread David Malcolm
I'd like to merge the JIT branch into trunk:
  https://gcc.gnu.org/wiki/JIT

This is v2 since it incorporates fixes for the various issues
identified by Joseph in an earlier submission:
  https://gcc.gnu.org/ml/gcc-patches/2014-09/msg02056.html

I've split up the current diff between trunk and the branch into 5
areas for ease of review (and to allow for early merger of the
supporting work, if it's deemed ready):

patch 1: exposes an entrypoint in libiberty that I need
patch 2: configure and Makefile changes in gcc
patch 3: timevar.h: Add an auto_timevar class
patch 4: State cleanups in gcc
patch 5: Add the jit code itself

[this is a diff of trunk r215958 aka
e012cdc775868e9922f5fef9068a764546876d93 which is from 2014-10-06,
vs jit branch version 75b3ee7acdc6de55354d65bb7d619386463e50a1].

I've successfully bootstrapped and regression-tested the cumulative
result of all of the patches against a control build, building them
both with --enable-host-shared, and with
  --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto
adding ,jit to the test build (both on x86_64-unknown-linux-gnu;
Fedora 20).

There were no regressions vs the control build, and the patched build
gains a jit.sum, with 4663 passes (and no failures).

OK for trunk?

Overall diffstat follows:

 ChangeLog.jit  |   23 +
 MAINTAINERS|1 +
 contrib/ChangeLog.jit  |   14 +
 contrib/jit-coverage-report.py |   67 +
 gcc/ChangeLog.jit  |  323 +
 gcc/Makefile.in|9 +
 gcc/cgraph.c   |   14 +
 gcc/cgraph.h   |6 +
 gcc/cgraphunit.c   |   20 +-
 gcc/configure  |   54 +-
 gcc/configure.ac   |   11 +
 gcc/diagnostic.c   |   11 +-
 gcc/doc/install.texi   |2 +-
 gcc/dwarf2out.c|   87 +
 gcc/dwarf2out.h|2 +
 gcc/gcse.c |9 +
 gcc/gcse.h |2 +
 gcc/ggc-page.c |5 +
 gcc/input.c|2 +-
 gcc/ipa-cp.c   |   12 +
 gcc/ipa-pure-const.c   |  113 +-
 gcc/ipa-reference.c|   17 +-
 gcc/ipa-reference.h|1 +
 gcc/java/ChangeLog.jit |   14 +
 gcc/jit/ChangeLog  |9 +
 gcc/jit/ChangeLog.jit  | 3252 ++
 gcc/jit/Make-lang.in   |  297 +
 gcc/jit/TODO.rst   |  119 +
 gcc/jit/config-lang.in |   42 +
 gcc/jit/docs/Makefile  |  153 +
 gcc/jit/docs/_build/texinfo/Makefile   |   50 +
 gcc/jit/docs/_build/texinfo/factorial.png  |  Bin 0 - 183838 bytes
 gcc/jit/docs/_build/texinfo/libgccjit.texi | 6724 
 gcc/jit/docs/_build/texinfo/sum-of-squares.png |  Bin 0 - 22839 bytes
 gcc/jit/docs/conf.py   |  258 +
 gcc/jit/docs/examples/install-hello-world.c|  123 +
 gcc/jit/docs/examples/tut01-square.c   |  107 +
 gcc/jit/docs/examples/tut02-sum-of-squares.c   |  172 +
 gcc/jit/docs/examples/tut03-toyvm/Makefile |   11 +
 gcc/jit/docs/examples/tut03-toyvm/factorial.toy|   50 +
 gcc/jit/docs/examples/tut03-toyvm/fibonacci.toy|   66 +
 gcc/jit/docs/examples/tut03-toyvm/toyvm.c  |  861 +++
 gcc/jit/docs/index.rst |   50 +
 gcc/jit/docs/internals/index.rst   |  157 +
 gcc/jit/docs/intro/factorial.png   |  Bin 0 - 183838 bytes
 gcc/jit/docs/intro/index.rst   |   27 +
 gcc/jit/docs/intro/install.rst |  227 +
 gcc/jit/docs/intro/sum-of-squares.png  |  Bin 0 - 22839 bytes
 gcc/jit/docs/intro/tutorial01.rst  |  349 +
 gcc/jit/docs/intro/tutorial02.rst  |  378 ++
 gcc/jit/docs/intro/tutorial03.rst  | 1108 
 gcc/jit/docs/topics/contexts.rst   |  315 +
 gcc/jit/docs/topics/expressions.rst|  524 ++
 gcc/jit/docs/topics/functions.rst  |  311 +
 gcc/jit/docs/topics/index.rst  |   30 +
 gcc/jit/docs/topics/locations.rst  |   69 +
 gcc/jit/docs/topics/objects.rst|   86 +
 gcc/jit/docs/topics/results.rst|   48 +
 gcc/jit/docs/topics/types.rst  |  217 +
 

[PATCH 2/5] gcc: configure and Makefile changes needed by jit

2014-10-13 Thread David Malcolm
gcc/ChangeLog:
* configure.ac (gcc_version): Expose this value for use via
AC_SUBST, since the jit code needs it within the new file
libgccjit.pc.in.
(doc_build_sys): New variable, set to sphinx if
sphinx is installed, falling back to texinfo otherwise.
(gcc-driver-name.h): Generate a gcc-driver-name.h file containing
GCC_DRIVER_NAME for the benefit of jit/jit-playback.c.
* configure: Regenerate.
* Makefile.in (doc_build_sys): New.
(bindir): New.
(pkgconfigdir): New.
(installdirs): Add creation of $(DESTDIR)$(pkgconfigdir).
(site.exp): When constructing site.exp, add a line to set bindir.
---
 gcc/Makefile.in  |  9 +
 gcc/configure| 54 --
 gcc/configure.ac | 11 +++
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 1dba76f..f5e3d4c 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -314,6 +314,11 @@ write_entries_to_file = $(shell rm -f $(2) || :) $(shell 
touch $(2)) \
  $(shell expr $(range) + 
$(write_entries_to_file_split) - 1), $(1)) \
 | tr ' ' '\012'  $(2)))
 
+# The jit documentation looks better if built with sphinx, but can be
+# built with texinfo if sphinx is not available.
+# configure sets doc_build_sys to sphinx or texinfo accordingly
+doc_build_sys=@doc_build_sys@
+
 # 
 # UNSORTED
 # 
@@ -565,6 +570,8 @@ bindir = @bindir@
 libdir = @libdir@
 # Directory in which GCC puts its executables.
 libexecdir = @libexecdir@
+# Directory in which to install .pc files for pkgconfig
+pkgconfigdir = @libdir@/pkgconfig
 
 # 
 # UNSORTED
@@ -3125,6 +3132,7 @@ installdirs:
$(mkinstalldirs) $(DESTDIR)$(infodir)
$(mkinstalldirs) $(DESTDIR)$(man1dir)
$(mkinstalldirs) $(DESTDIR)$(man7dir)
+   $(mkinstalldirs) $(DESTDIR)$(pkgconfigdir)
 
 PLUGIN_HEADERS = $(TREE_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   toplev.h $(DIAGNOSTIC_CORE_H) $(BASIC_BLOCK_H) $(HASH_TABLE_H) \
@@ -3496,6 +3504,7 @@ site.exp: ./config.status Makefile
@echo # add them to the last section  ./site.tmp
@echo set rootme \`${PWD_COMMAND}`\  ./site.tmp
@echo set srcdir \`cd ${srcdir}; ${PWD_COMMAND}`\  ./site.tmp
+   @echo set bindir \`cd ${bindir}; ${PWD_COMMAND}`\  ./site.tmp
@echo set host_triplet $(host)  ./site.tmp
@echo set build_triplet $(build)  ./site.tmp
@echo set target_triplet $(target)  ./site.tmp
diff --git a/gcc/configure b/gcc/configure
index 380a235..81634f2 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -743,6 +743,7 @@ CXXDEPMODE
 DEPDIR
 am__leading_dot
 CXXCPP
+doc_build_sys
 AR
 NM
 BISON
@@ -824,6 +825,7 @@ build_os
 build_vendor
 build_cpu
 build
+gcc_version
 target_alias
 host_alias
 build_alias
@@ -3040,6 +3042,7 @@ ac_config_headers=$ac_config_headers 
auto-host.h:config.in
 
 gcc_version=`cat $srcdir/BASE-VER`
 
+
 # Determine the host, build, and target systems
 ac_aux_dir=
 for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
@@ -8060,6 +8063,47 @@ fi
 
 fi
 
+# The jit documentation looks better if built with sphinx, but can be
+# built with texinfo if sphinx is not available.
+# Set doc_build_sys to sphinx or texinfo accordingly.
+# Extract the first word of sphinx-build, so it can be a program name with 
args.
+set dummy sphinx-build; ac_word=$2
+{ $as_echo $as_me:${as_lineno-$LINENO}: checking for $ac_word 5
+$as_echo_n checking for $ac_word...  6; }
+if test ${ac_cv_prog_doc_build_sys+set} = set; then :
+  $as_echo_n (cached)  6
+else
+  if test -n $doc_build_sys; then
+  ac_cv_prog_doc_build_sys=$doc_build_sys # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z $as_dir  as_dir=.
+for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f $as_dir/$ac_word$ac_exec_ext  $as_test_x 
$as_dir/$ac_word$ac_exec_ext; }; then
+ac_cv_prog_doc_build_sys=sphinx
+$as_echo $as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext 
5
+break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z $ac_cv_prog_doc_build_sys  ac_cv_prog_doc_build_sys=texinfo
+fi
+fi
+doc_build_sys=$ac_cv_prog_doc_build_sys
+if test -n $doc_build_sys; then
+  { $as_echo $as_me:${as_lineno-$LINENO}: result: $doc_build_sys 5
+$as_echo $doc_build_sys 6; }
+else
+  { $as_echo $as_me:${as_lineno-$LINENO}: result: no 5
+$as_echo no 6; }
+fi
+
+
 
 # 
 # Checks for C headers
@@ -18049,7 +18093,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat  conftest.$ac_ext _LT_EOF
-#line 18052 configure
+#line 18096 configure
 #include confdefs.h
 
 #if HAVE_DLFCN_H
@@ -18155,7 +18199,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat  conftest.$ac_ext _LT_EOF

[PATCH 1/5] libiberty: Expose choose_tmpdir, and fix constness of return type

2014-10-13 Thread David Malcolm
The jit needs to create a temporary directory, so I need to expose
libiberty's choose_tmpdir as a public entrypoint.

include/ChangeLog:
* libiberty.h (choose_tmpdir): New prototype.

libiberty/ChangeLog:
* choose-temp.c (choose_tmpdir): Remove now-redundant local
copy of prototype.
* functions.texi: Regenerate.
* make-temp-file.c (choose_tmpdir): Convert return type from
char * to const char * - given that this returns a pointer to
a memoized allocation, the caller must not touch it.
---
 include/libiberty.h|  5 +
 libiberty/choose-temp.c|  1 -
 libiberty/functions.texi   | 13 ++---
 libiberty/make-temp-file.c |  4 ++--
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/libiberty.h b/include/libiberty.h
index bcc1f9a..d09c9a5 100644
--- a/include/libiberty.h
+++ b/include/libiberty.h
@@ -227,6 +227,11 @@ extern char *make_relative_prefix (const char *, const 
char *,
 extern char *make_relative_prefix_ignore_links (const char *, const char *,
const char *) ATTRIBUTE_MALLOC;
 
+/* Returns a pointer to a directory path suitable for creating temporary
+   files in.  */
+
+extern const char *choose_tmpdir (void) ATTRIBUTE_RETURNS_NONNULL;
+
 /* Choose a temporary directory to use for scratch files.  */
 
 extern char *choose_temp_base (void) ATTRIBUTE_MALLOC 
ATTRIBUTE_RETURNS_NONNULL;
diff --git a/libiberty/choose-temp.c b/libiberty/choose-temp.c
index 0a454cf..8e1e84b 100644
--- a/libiberty/choose-temp.c
+++ b/libiberty/choose-temp.c
@@ -34,7 +34,6 @@ Boston, MA 02110-1301, USA.  */
 #endif
 
 #include libiberty.h
-extern char *choose_tmpdir (void);
 
 /* Name of temporary file.
mktemp requires 6 trailing X's.  */
diff --git a/libiberty/functions.texi b/libiberty/functions.texi
index 9323ff9..387aee0 100644
--- a/libiberty/functions.texi
+++ b/libiberty/functions.texi
@@ -125,7 +125,7 @@ Uses @code{malloc} to allocate storage for @var{nelem} 
objects of
 
 @end deftypefn
 
-@c choose-temp.c:46
+@c choose-temp.c:45
 @deftypefn Extension char* choose_temp_base (void)
 
 Return a prefix for temporary file names or @code{NULL} if unable to
@@ -139,7 +139,7 @@ not recommended.
 @end deftypefn
 
 @c make-temp-file.c:96
-@deftypefn Replacement char* choose_tmpdir ()
+@deftypefn Replacement const char* choose_tmpdir ()
 
 Returns a pointer to a directory path suitable for creating temporary
 files in.
@@ -160,9 +160,8 @@ number of seconds used.
   @dots{}, @code{NULL})
 
 Concatenate zero or more of strings and return the result in freshly
-@code{xmalloc}ed memory.  Returns @code{NULL} if insufficient memory is
-available.  The argument list is terminated by the first @code{NULL}
-pointer encountered.  Pointers to empty strings are ignored.
+@code{xmalloc}ed memory.  The argument list is terminated by the first
+@code{NULL} pointer encountered.  Pointers to empty strings are ignored.
 
 @end deftypefn
 
@@ -528,7 +527,7 @@ nineteen EBCDIC varying characters is tested; exercise 
caution.)
 @end ftable
 @end defvr
 
-@c hashtab.c:336
+@c hashtab.c:328
 @deftypefn Supplemental htab_t htab_create_typed_alloc (size_t @var{size}, @
 htab_hash @var{hash_f}, htab_eq @var{eq_f}, htab_del @var{del_f}, @
 htab_alloc @var{alloc_tab_f}, htab_alloc @var{alloc_f}, @
@@ -1163,7 +1162,7 @@ control over the state of the random number generator.
 
 @end deftypefn
 
-@c concat.c:174
+@c concat.c:160
 @deftypefn Extension char* reconcat (char *@var{optr}, const char *@var{s1}, @
   @dots{}, @code{NULL})
 
diff --git a/libiberty/make-temp-file.c b/libiberty/make-temp-file.c
index 7b74f81..244cc23 100644
--- a/libiberty/make-temp-file.c
+++ b/libiberty/make-temp-file.c
@@ -93,7 +93,7 @@ static char *memoized_tmpdir;
 
 /*
 
-@deftypefn Replacement char* choose_tmpdir ()
+@deftypefn Replacement const char* choose_tmpdir ()
 
 Returns a pointer to a directory path suitable for creating temporary
 files in.
@@ -102,7 +102,7 @@ files in.
 
 */
 
-char *
+const char *
 choose_tmpdir (void)
 {
   if (!memoized_tmpdir)
-- 
1.8.5.3



[PATCH 4/5] State cleanups

2014-10-13 Thread David Malcolm
This patch contains various cleanups needed by the jit, so that we can
rerun the compiler code repeatedly within the same process:

* Introduce per-file finalization routines to clean up file-static
  variables.

* Make various initializations idempotent (init_ggc), or cleanup their
  previous state (init_stringpool).

* Introduce a class toplev, so that we can easy control whether
  timevars are reset each time, or run in a cumulative mode.

gcc/ChangeLog:
* cgraph.c (cgraph_c_finalize): New function.
* cgraph.h (cgraph_c_finalize): New prototype.
(cgraphunit_c_finalize): New prototype.
(ipa_cp_c_finalize): New prototype.
* cgraphunit.c (first_analyzed): Move from analyze_functions
to file-scope.
(first_analyzed_var): Likewise.
(analyze_functions): Move static variables into file-scope.
(cgraphunit_c_finalize): New function.
* diagnostic.c (diagnostic_finish): Free the memory for
context-classify_diagnostic and context-printer, running the
destructor for the latter.
(bt_stop): Use toplev::main.
* dwarf2out.c (dwarf2out_finalize): New function.
* dwarf2out.h (dwarf2out_c_finalize): New prototype.
* gcse.c (gcse_c_finalize): New function.
* gcse.h (gcse_c_finalize): New prototype.
* ggc-page.c (init_ggc): Make idempotent.
* input.c (input_location): Initialize to UNKNOWN_LOCATION.
* ipa-cp.c (ipa_cp_c_finalize): New function.
* ipa-pure-const.c (function_insertion_hook_holder): Move to be
a field of class pass_ipa_pure_const.
(node_duplication_hook_holder): Likewise.
(node_removal_hook_holder): Likewise.
(register_hooks): Convert to method...
(pass_ipa_pure_const::register_hooks): ...here, converting
static variable init_p into...
(pass_ipa_pure_const::init_p): ...new field.
(pure_const_generate_summary): Update invocation of
register_hooks to invoke as a method of current_pass.
(pure_const_read_summary): Likewise.
(propagate): Convert to...
(pass_ipa_pure_const::execute): ...method.
* ipa-reference.c (ipa_init): Move static bool init_p from here
to...
(ipa_init_p): New file-scope variable, so that it can be reset
when repeatedly invoking the compiler within one process by...
(ipa_reference_c_finalize): New function.
* ipa-reference.h (ipa_reference_c_finalize): New.
* main.c (main): Replace invocation of toplev_main with
construction of a toplev instance, and call its main method.
* params.c (global_init_params): Add an assert that params_finished is
false.
(params_c_finalize): New.
* params.h (params_c_finalize): New.
* passes.c (execute_ipa_summary_passes): Set current_pass before
invoking generate_summary, for the benefit of pass_ipa_pure_const.
(ipa_write_summaries_2): Assign pass to current_pass global
before calling write_summary hook.
(ipa_write_optimization_summaries_1): Likewise when calling
write_optimization_summary hook.
(ipa_read_summaries_1): Likewise for read_summary hook.
(ipa_read_optimization_summaries_1): Likewise for
read_optimization_summary hook.
(execute_ipa_stmt_fixups): Likewise.
* stringpool.c (init_stringpool): Clean up if we're called more
than once.
* timevar.c (timevar_init): Ignore repeated calls.
* toplev.c: Include dwarf2out.h, ipa-reference.h, gcse.h.
(general_init): Reset input_location to UNKNOWN_LOCATION.
(initialize_rtl): Move static local initialized_once
into file scope, and rename to...
(rtl_initialized): New variable.
(do_compile): Move timevar initialization from here to
toplev::start_timevars.
(toplev::toplev, toplev::~toplev, toplev::start_timevars,
toplev::finalize): New functions.
(toplev_main): Rename to...
(toplev::main): ...this.
* toplev.h (class toplev): New class.
---
 gcc/cgraph.c |  14 +++
 gcc/cgraph.h |   6 +++
 gcc/cgraphunit.c |  20 -
 gcc/diagnostic.c |  11 -
 gcc/dwarf2out.c  |  87 +++
 gcc/dwarf2out.h  |   2 +
 gcc/gcse.c   |   9 
 gcc/gcse.h   |   2 +
 gcc/ggc-page.c   |   5 +++
 gcc/input.c  |   2 +-
 gcc/ipa-cp.c |  12 ++
 gcc/ipa-pure-const.c | 113 +--
 gcc/ipa-reference.c  |  17 ++--
 gcc/ipa-reference.h  |   1 +
 gcc/main.c   |   6 ++-
 gcc/params.c |  14 +++
 gcc/params.h |   4 ++
 gcc/passes.c |   6 +++
 gcc/stringpool.c |   5 +++
 gcc/timevar.c|   3 ++
 gcc/toplev.c |  67 +++---
 gcc/toplev.h |  19 -
 22 files 

[PATCH 3/5] timevar.h: Add an auto_timevar class

2014-10-13 Thread David Malcolm
This is used in a couple of places in jit/jit-playback.c to ensure
that we pop the timevar on every exit path from a function.

I could rewrite them if need be, but it does simplify things.

Written by Tom Tromey.

gcc/ChangeLog:
* timevar.h (class auto_timevar): New class.
---
 gcc/timevar.h | 24 
 1 file changed, 24 insertions(+)

diff --git a/gcc/timevar.h b/gcc/timevar.h
index 6703cc9..f018e39 100644
--- a/gcc/timevar.h
+++ b/gcc/timevar.h
@@ -110,6 +110,30 @@ timevar_pop (timevar_id_t tv)
 timevar_pop_1 (tv);
 }
 
+// This is a simple timevar wrapper class that pushes a timevar in its
+// constructor and pops the timevar in its destructor.
+class auto_timevar
+{
+ public:
+  auto_timevar (timevar_id_t tv)
+: m_tv (tv)
+  {
+timevar_push (m_tv);
+  }
+
+  ~auto_timevar ()
+  {
+timevar_pop (m_tv);
+  }
+
+ private:
+
+  // Private to disallow copies.
+  auto_timevar (const auto_timevar );
+
+  timevar_id_t m_tv;
+};
+
 extern void print_time (const char *, long);
 
 #endif /* ! GCC_TIMEVAR_H */
-- 
1.8.5.3



Re: PING: Re: [patch] tag ../include/*

2014-10-13 Thread Jeff Law

On 10/13/14 11:29, Aldy Hernandez wrote:

On 10/13/14 09:27, Jeff Law wrote:

On 10/13/14 08:53, Aldy Hernandez wrote:

PING

On 10/07/14 09:37, Aldy Hernandez wrote:

Is there a reason we don't create etags for toplevel include files?  If
not, could I please apply this patch?

ENOPATCH

jeff


Sorry, patch was in original message.  Attached here.

Ok for the trunk.  Sorry it got dropped.
jeff



Re: [PATCH 2/3] PR other/61321 - demangler crash on casts in template parameters

2014-10-13 Thread Cary Coutant
Ping. Jason, do you still think the special-case for conversion ops is
inappropriate?

-cary


On Fri, Jul 25, 2014 at 2:16 AM, Pedro Alves pal...@redhat.com wrote:
 On 07/24/2014 11:35 PM, Cary Coutant wrote:
 It seems that the problem here is more general; a template argument list is
 not in scope within that same template argument list.  Can't we fix that
 without special-casing conversion ops?

 I think conversion ops really are a special case.

 Thanks Cary.  FWIW, I agree.

 (GDB 7.8 hasn't been released yet, though it's close.  If this
 patch is approved as is, we'll be able to have the crash
 fixed there.  If this requires a significant rewrite though,
 I'm afraid I might not be able to do it myself anytime soon.)

 It's the only case
 where the template parameters refer to the template argument list from
 the cast operator's enclosing template. In a cast expression, like
 anywhere else you might have a template parameter, the template
 parameter refers to the template argument list of the immediately
 enclosing template.

 I think this note from Section 5.1.3 (Operator Encodings) of the ABI
 is what makes this a special case (it's an informative comment in the
 document, but seems to me to be normative):

 For a user-defined conversion operator the result type (i.e., the
 type to which the operator converts) is part of the mangled name of
 the function. If the conversion operator is a member template, the
 result type will appear before the template parameters. There may be
 forward references in the result type to the template parameters.


 --
 Thanks,
 Pedro Alves



Re: [gofrontend-dev] Re: [PATCH 03/13] HACK! Allow the static chain to be set from C

2014-10-13 Thread Peter Collingbourne
On Mon, Oct 13, 2014 at 1:10 AM, Richard Biener
richard.guent...@gmail.com wrote:
 On Sat, Oct 11, 2014 at 6:23 AM, Richard Henderson r...@redhat.com wrote:
 On 10/10/2014 06:42 PM, Peter Collingbourne wrote:
 A colleague has suggested a perhaps nicer syntax:

 __builtin_call_chain(pointer, call) where call must be a call expression

 I like this.

 Unlike the other suggestions, it doesn't mess with the parsing of the 
 regular
 part of the function call.  And, depending on what point the builtin is 
 lowered
 and applied to the AST, it might not require any parsing changes at all.

 I'll have a look at this next week.  Thanks.

 Does the frontend know that the call expects a static chain?

The chain is not part of the function type, so no.

Peter


Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code

2014-10-13 Thread H.J. Lu
On Mon, Oct 13, 2014 at 9:32 AM, Evgeny Stupachenko evstu...@gmail.com wrote:
 Reattached.

 On Mon, Oct 13, 2014 at 8:22 PM, Uros Bizjak ubiz...@gmail.com wrote:
 On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com 
 wrote:

 ChangeLog for testsuite:

 2014-10-13  Evgeny Stupachenko  evstu...@gmail.com

 PR target/8340
 PR middle-end/47602
 PR rtl-optimization/55458
 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now.
 * gcc.target/i386/pr55458.c: Likewise.
 * gcc.target/i386/pr47602.c: New.
 * gcc.target/i386/pr23098.c: Move to XFAIL.

 Reversed patch was attached. Please repost.

 Uros.

This caused a regression:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63527

-- 
H.J.


Re: [PATCH] cleanup in c-parser

2014-10-13 Thread Jeff Law

On 10/13/14 05:11, Anthony Brandon wrote:

I updated the patch with a comment. Actually, Manuel handed me this
patch just to help me get familiar with the process of submitting and
testing.
Generating this one with git diff looks different so I'm not sure if
that's a problem or not.

This version is fine.  Please install on the trunk.

Thanks,
jeff



Re: [PATCH] Fix typo in comment for IRA

2014-10-13 Thread Jeff Law

On 10/13/14 02:09, Kito Cheng wrote:

Hi all:

This patch contain lots typo fix for IRA module by aspell :)

ChangeLog

2014-10-13  Kito Cheng  k...@0xlab.org

 * ira.c: Fix typo in comment.
 * ira.h: Ditto.
 * ira-build.c: Ditto.
 * ira-color.c: Ditto.
 * ira-emit.c: Ditto.
 * ira-int.h: Ditto.
 * ira-lives.c: Ditto.


This is fine for the trunk.  Please install.

Thanks,
Jeff


Re: [PATCH] Fix typo in comment for IRA

2014-10-13 Thread Marc Glisse

On Mon, 13 Oct 2014, Kito Cheng wrote:


- -1 if it is not a cost classe.  */
+ -1 if it is not a cost classes.  */


a cost class, no plural here.

--
Marc Glisse


Re: [PATCH] move many gc hashtab to hash_table

2014-10-13 Thread Trevor Saunders
On Mon, Oct 13, 2014 at 09:02:44AM -0700, H.J. Lu wrote:
 On Mon, Oct 13, 2014 at 8:33 AM, H.J. Lu hjl.to...@gmail.com wrote:
  On Fri, Oct 10, 2014 at 1:44 PM, Trevor Saunders tsaund...@mozilla.com 
  wrote:
  On Fri, Oct 10, 2014 at 02:26:00PM -0600, Jeff Law wrote:
  On 10/06/14 21:24, tsaund...@mozilla.com wrote:
  From: Trevor Saunders tsaund...@mozilla.com
  
  Hi,
  
  This changes almost all of the ggc htab that don't use the if_marked 
  option to
  be hash_tables.  I added a for_user gty attribute so that types could be 
  used
  from user marking routines without either using the mangled names or 
  being
  referenced in a template argument which was previously the only way to 
  get
  gengtype to emit user callable marking routines.
  
  bootstrapped + regtested on x86_64-unknown-linux-gnu, ran make all-gcc 
  for
  ppc64-linux, ppc64-apple-darwin, mips-elf, m32c-elf, mep-elf, and
  x86_64-apple-darwin.  I haven't heard back yet on my request for a cfarm
  account so I haven't tried bootstrapping on other archs, but more 
  testing is
  most welcome. Ok?
  
  Trev
  
  
  gcc/
  
  * asan.c, cfgloop.c, cfgloop.h, cgraph.c, cgraph.h,
   config/darwin.c, config/m32c/m32c.c, config/mep/mep.c,
   config/mips/mips.c, config/rs6000/rs6000.c, dwarf2out.c,
   function.c, function.h, gimple-ssa.h, libfuncs.h, optabs.c,
   output.h, rtl.h, sese.c, symtab.c, tree-cfg.c, tree-dfa.c,
   tree-ssa.c, varasm.c: Use hash-table instead of hashtab.
   * doc/gty.texi (for_user): Document new option.
   * gengtype.c (create_user_defined_type): Don't try to get a struct 
   for
   char.
   (walk_type): Don't error out on for_user option.
   (write_func_for_structure): Emit user marking routines if requested 
   by
   for_user option.
   (write_local_func_for_structure): Likewise.
   (main): Mark types with for_user option as used.
   * ggc.h (gt_pch_nx): Add overload for unsigned int.
   * hash-map.h (hash_map::hash_entry::pch_nx_helper): AddOverloads.
   * hash-table.h (ggc_hasher): New struct.
   (hash_table::create_ggc): New function.
   (gt_pch_nx): New overload for hash_table.
  
  java/
  
   * class.c, decl.c, except.c, expr.c, java-tree.h, lang.c: Use
   hash_table instead of hashtab.
  
  objc/
  
   * objc-act.c: use hash_table instead of hashtab.
  
  cp/
  
   * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c,
   pt.c, semantics.c, tree.c, typeck2.c: Use hash_table instead of
   hashtab.
  
  fortran/
  
   * trans-decl.c, trans.c, trans.h: Use hash_table instead of hashtab.
  
  c-family/
  
   * c-common.c: Use hash_table instead of hashtab.
  Whee, meaning we no longer have to pass through void * pointers and the
  visually ugly casting that requires in the callbacks?!?  Yea!
 
  yeah, you can even use an iterator now if you like
 
  -++*((unsigned HOST_WIDE_INT *) data);
  +++* data;
  I think coding standards require no space here.  Obviously a nit.
 
  oops, definitely should be fixed :)
 
  This is obviously a fairly mechanical change.  I did some spot checking 
  and
  it looks good.  I don't expect any fallout.
 
  Well, its large enough I expect *something* but I doubt it'll be a big
  deal.
 
  Good for the trunk.
 
  thanks for the review, I know it was rather big.
 
  This breaks bootstrap on Linux/x86:
 
  https://gcc.gnu.org/ml/gcc-regression/2014-10/msg00237.html
 
  This patch fixes it.
 
 This is what I checked in.

Sorry for the bustage, and thank you!

Trev

 
 
 -- 
 H.J.
 ---
 Index: ChangeLog
 ===
 --- ChangeLog (revision 216150)
 +++ ChangeLog (working copy)
 @@ -1,3 +1,9 @@
 +2014-10-13  H.J. Lu  hongjiu...@intel.com
 +
 + * mangle.c (mangle_conv_op_name_for_type): Cast elements to
 + unsigned long.
 + (print_template_statistics): Cast size and elements to long.
 +
  2014-10-12  Trevor Saunders  tsaund...@mozilla.com
 
   * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c,
 Index: mangle.c
 ===
 --- mangle.c (revision 216150)
 +++ mangle.c (working copy)
 @@ -3744,7 +3744,8 @@ mangle_conv_op_name_for_type (const tree
char buffer[64];
 
 /* Create a unique name corresponding to TYPE.  */
 -  sprintf (buffer, operator %lu, conv_type_names-elements ());
 +  sprintf (buffer, operator %lu,
 +   (unsigned long) conv_type_names-elements ());
identifier = get_identifier (buffer);
*slot = identifier;
 
 Index: pt.c
 ===
 --- pt.c (revision 216150)
 +++ pt.c (working copy)
 @@ -22324,12 +22324,12 @@ void
  print_template_statistics (void)
  {
fprintf (stderr, decl_specializations: size %ld, %ld elements, 
 -   %f collisions\n, decl_specializations-size (),
 -   decl_specializations-elements (),
 +   %f collisions\n, (long) 

[c++-concepts] cleanup expressions

2014-10-13 Thread Andrew Sutton
Sometimes, cleanup_point_exprs are being added to concept definitions.
This patch allows that to happen, but removes the cleanup point during
normalization.

2014-10-13  Andrew Sutton  andrew.n.sut...@gmail.com

Fix bug related to cleanup expressions in concept definitions.
* gcc/cp/constraint.cc (check_function_concept): See through
cleanup handlers when checking the body of a function.
(normalize_cast): Removed. Handled in a default case.
(normalize_cleanup_point): New. Normalize the expression without
the cleanup handler.

Andrew Sutton


Re: [c++-concepts] cleanup expressions

2014-10-13 Thread Andrew Sutton
And here's the patch:


Andrew Sutton


On Mon, Oct 13, 2014 at 3:33 PM, Andrew Sutton
andrew.n.sut...@gmail.com wrote:
 Sometimes, cleanup_point_exprs are being added to concept definitions.
 This patch allows that to happen, but removes the cleanup point during
 normalization.

 2014-10-13  Andrew Sutton  andrew.n.sut...@gmail.com

 Fix bug related to cleanup expressions in concept definitions.
 * gcc/cp/constraint.cc (check_function_concept): See through
 cleanup handlers when checking the body of a function.
 (normalize_cast): Removed. Handled in a default case.
 (normalize_cleanup_point): New. Normalize the expression without
 the cleanup handler.

 Andrew Sutton
Index: constraint.cc
===
--- constraint.cc	(revision 215720)
+++ constraint.cc	(working copy)
@@ -285,6 +285,14 @@ check_function_concept (tree fn)
   tree body = DECL_SAVED_TREE (fn);
   if (TREE_CODE (body) == BIND_EXPR)
 body = BIND_EXPR_BODY (body);
+  
+  // Sometimes a funciton call results the creation of clean up
+  // points. Allow these to be preserved in the body of the 
+  // constraint, as we might actually need them for some constexpr
+  // evaluations.
+  if (TREE_CODE (body) == CLEANUP_POINT_EXPR)
+body = TREE_OPERAND(body, 0);
+
   if (TREE_CODE (body) != RETURN_EXPR)
 error_at (loc, function concept definition %qD has multiple statements, 
   fn);
@@ -316,9 +324,9 @@ tree normalize_expr_req (tree);
 tree normalize_type_req (tree);
 tree normalize_nested_req (tree);
 tree normalize_var (tree);
+tree normalize_cleanup_point (tree);
 tree normalize_template_id (tree);
 tree normalize_stmt_list (tree);
-tree normalize_cast (tree);
 tree normalize_atom (tree);
 
 // Reduce the requirement T into a logical formula written in terms of
@@ -383,12 +391,12 @@ normalize_expr (tree t)
 case TEMPLATE_ID_EXPR: 
   return normalize_template_id (t);
 
-case CAST_EXPR:
-  return normalize_cast (t);
-
 case BIND_EXPR:
   return normalize_node (BIND_EXPR_BODY (t));
 
+case CLEANUP_POINT_EXPR:
+  return normalize_cleanup_point (t);
+
 // Do not recurse.
 case TAG_DEFN: 
   return NULL_TREE;
@@ -655,12 +663,11 @@ normalize_requires (tree t)
   return t;
 }
 
-// Normalize a cast expression.
+// Normalize a cleanup point by normalizing the underlying
+// expression.
 tree
-normalize_cast (tree t) 
-{
-  // return normalize_node (TREE_VALUE (TREE_OPERAND (t, 0)));
-  return normalize_atom (t);
+normalize_cleanup_point (tree t) {
+  return normalize_node (TREE_OPERAND (t, 0));
 }
 
 // Normalize an atomic expression by performing some basic checks.


[c++-concepts] introduction syntax regression

2014-10-13 Thread Andrew Sutton
The original patch for concept introductions was not popping a
deferred access check. This fixes that problem, although I'm not sure
if we need to defer access checks at all.

2014-10-13  Andrew Sutton  andrew.n.sut...@gmail.com

Fix regression related to concept introductions.
* gcc/cp/constraint.cc (cp_parser_template_declaration_after_exp):
Pop deferred access checks afer parsing the introduction.


Andrew Sutton
Index: parser.c
===
--- parser.c	(revision 214991)
+++ parser.c	(working copy)
@@ -24409,19 +24409,21 @@ cp_parser_template_declaration_after_exp
 	= current_template_reqs;
 	}
 }
-  else if(flag_concepts)
+  else if (flag_concepts)
 {
   need_lang_pop = false;
   checks = NULL;
   saved_template_reqs = release (current_template_reqs);
-  push_deferring_access_checks (dk_deferred);
 
   // Scope may be changed by a nested-name-specifier.
   tree saved_scope = parser-scope;
   tree saved_qualifying_scope = parser-qualifying_scope;
   tree saved_object_scope = parser-object_scope;
 
+  push_deferring_access_checks (dk_deferred);
   parameter_list = cp_parser_template_introduction (parser);
+  pop_deferring_access_checks ();
+
   if (parameter_list == error_mark_node)
 {
 	  // Restore template requirements before returning.


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [14/n] Optimize constant checks

2014-10-13 Thread Ilya Enkovich
On 09 Oct 11:13, Jeff Law wrote:
 On 10/08/14 13:21, Ilya Enkovich wrote:
 Hi,
 
 This patch adds a removal of checks known to always pass into checker 
 optimization.
 
 Thanks,
 Ilya
 --
 2014-10-08  Ilya Enkovich  ilya.enkov...@intel.com
 
  * tree-chkp.c (chkp_remove_check_if_pass): New.
  (chkp_remove_constant_checks): New.
  (chkp_opt_execute): Run constant check removal
  algorithm.
 So again, I'd like to see all the optimization stuff pulled into its
 own file and and basic tests that we can use for smoke testing now
 and in the future.
 
 
 
 
 +  else if (result == -1)
 +{
 +  if (dump_file  (dump_flags  TDF_DETAILS))
 +fprintf (dump_file,   action: keep check (always fail)\n);
 +}
 ISTM this case should generate a compile-time warning.  We've just
 determined statically that this test is always going to fail, right?
 
 +  /* Iterate throw all found checks in BB.  */
 s/throw/through/
 
 With the changes above, this will be OK for the trunk.
 
 
 Jeff

Thanks for review!  Here is a version with a warning and a couple of tests 
added.

Ilya
--
gcc/

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* tree-chkp-opt.c: Include diagnostic.h.
(chkp_remove_check_if_pass): New.
(chkp_remove_constant_checks): New.
(chkp_opt_execute): Run constant check removal
algorithm.
* c-family/c.opt (Wchkp): New.

gcc/testsuite/

2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* gcc.target/i386/chkp-const-check-1.c: New.
* gcc.target/i386/chkp-const-check-2.c: New.


diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 1ca5a95..5202e3c 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -323,6 +323,10 @@ Wchar-subscripts
 C ObjC C++ ObjC++ Var(warn_char_subscripts) Warning LangEnabledBy(C ObjC C++ 
ObjC++,Wall)
 Warn about subscripts whose type is \char\
 
+Wchkp
+C ObjC C++ ObjC++ Var(warn_chkp) Warning EnabledBy(Wall)
+Warn about memory access errors found by Pointer Bounds Checker
+
 Wclobbered
 C ObjC C++ ObjC++ Var(warn_clobbered) Warning EnabledBy(Wextra)
 Warn about variables that might be changed by \longjmp\ or \vfork\
diff --git a/gcc/testsuite/gcc.target/i386/chkp-const-check-1.c 
b/gcc/testsuite/gcc.target/i386/chkp-const-check-1.c
new file mode 100644
index 000..8c90239
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/chkp-const-check-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp -O2 
-fdump-tree-chkpopt } */
+/* { dg-final { scan-tree-dump-not bndcl chkpopt } } */
+/* { dg-final { scan-tree-dump-not bndcu chkpopt } } */
+
+
+int test (int *p)
+{
+  p = (int *)__builtin___bnd_set_ptr_bounds (p, sizeof (int));
+  return *p;
+}
diff --git a/gcc/testsuite/gcc.target/i386/chkp-const-check-2.c 
b/gcc/testsuite/gcc.target/i386/chkp-const-check-2.c
new file mode 100644
index 000..ab573eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/chkp-const-check-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp -O2 -Wchkp } */
+
+int test (int *p)
+{
+  p = (int *)__builtin___bnd_set_ptr_bounds (p, sizeof (int));
+  return *(p + 1); /* { dg-warning memory access check always fail  } */
+}
diff --git a/gcc/tree-chkp-opt.c b/gcc/tree-chkp-opt.c
index 620df47..5112769 100644
--- a/gcc/tree-chkp-opt.c
+++ b/gcc/tree-chkp-opt.c
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
 #include gimplify-me.h
 #include expr.h
 #include tree-chkp.h
+#include diagnostic.h
 
 enum check_type
 {
@@ -693,6 +694,48 @@ chkp_get_check_result (struct check_info *ci, tree bounds)
   return res;
 }
 
+/* Try to compare bounds value and address value
+   used in the check CI.  If we can prove that check
+   always pass then remove it.  */
+static void
+chkp_remove_check_if_pass (struct check_info *ci)
+{
+  int result = 0;
+
+  if (dump_file  (dump_flags  TDF_DETAILS))
+{
+  fprintf (dump_file, Trying to remove check: );
+  print_gimple_stmt (dump_file, ci-stmt, 0, 0);
+}
+
+  result = chkp_get_check_result (ci, ci-bounds);
+
+  if (result == 1)
+{
+  gimple_stmt_iterator i = gsi_for_stmt (ci-stmt);
+
+  if (dump_file  (dump_flags  TDF_DETAILS))
+   fprintf (dump_file,   action: delete check (always pass)\n);
+
+  gsi_remove (i, true);
+  unlink_stmt_vdef (ci-stmt);
+  release_defs (ci-stmt);
+  ci-stmt = NULL;
+}
+  else if (result == -1)
+{
+  if (dump_file  (dump_flags  TDF_DETAILS))
+   fprintf (dump_file,   action: keep check (always fail)\n);
+  warning_at (gimple_location (ci-stmt), OPT_Wchkp,
+ memory access check always fail);
+}
+  else if (result == 0)
+{
+  if (dump_file  (dump_flags  TDF_DETAILS))
+   fprintf (dump_file,   action: keep check (cannot compute result)\n);
+}
+}
+
 /* For bounds used in CI check if bounds are produced by
intersection and we may use 

Re: [Patch] Fix PR61889 for the w64-mingw32 case

2014-10-13 Thread Xinliang David Li
Honza, not sure if this patch is idea, but this will unblock mingw
build problems. Can this one get in?

thanks,

David

On Wed, Sep 24, 2014 at 8:22 AM, Rainer Emrich
rai...@emrich-ebersheim.de wrote:
 The following patch fixes PR61889 for x86_64-w64-mingw32. Details can be found
 on https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61889

 The patch was bootstrapped on x86_64-w64-mingw32.

 If patch the patch is ok, Kai would you apply, please?

 Rainer

 2014-09-24  Rainer Emrich  rai...@emrich-ebersheim.de

 PR gcov-profile/61889
 * gcc/gcov-tool.c: Remove wrong #if !defined(_WIN32)
 * libgcc/libgcov-driver-system.c: undefine clashing macro for mkdir


 Index: gcc/gcov-tool.c
 ===
 --- gcc/gcov-tool.c (Revision 215554)
 +++ gcc/gcov-tool.c (Arbeitskopie)
 @@ -89,11 +89,7 @@ gcov_output_files (const char *out, stru
/* Try to make directory if it doesn't already exist.  */
if (access (out, F_OK) == -1)
  {
 -#if !defined(_WIN32)
if (mkdir (out, S_IRWXU | S_IRWXG | S_IRWXO) == -1  errno != EEXIST)
 -#else
 -  if (mkdir (out) == -1  errno != EEXIST)
 -#endif
  fatal_error (Cannot make directory %s, out);
  } else
unlink_profile_dir (out);
 Index: libgcc/libgcov-driver-system.c
 ===
 --- libgcc/libgcov-driver-system.c  (Revision 215554)
 +++ libgcc/libgcov-driver-system.c  (Arbeitskopie)
 @@ -66,6 +66,9 @@ create_file_directory (char *filename)
  #ifdef TARGET_POSIX_IO
   mkdir (filename, 0755) == -1
  #else
 +#ifdef mkdir
 +#undef mkdir
 +#endif
   mkdir (filename) == -1
  #endif
  /* The directory might have been made by another process.  */


Re: RFA: fix mode confusion in caller-save.c:replace_reg_with_saved_mem

2014-10-13 Thread Jeff Law

On 10/11/14 03:32, Joern Rennecke wrote:

On 10 October 2014 21:13, Jeff Law l...@redhat.com wrote:
...

ISTM it would be better to find the mode of the same class that corresponds
to GET_MODE_SIZE (mode) / nregs.  In your case that's obviously QImode :-)


Like this?
Or did you mean to remove the save_mode[regno] use altogether?  I can
think of arguments for or against, but got no
concrete examples for either.
Yea, that's basically what I was thinking.  I probably wouldn't have 
bothered with the if (hard_regno ...) check, but I can see why you might 
want that added measure of safety before slamming in a new mode.


I think you want smode in the mode_for_size call rather than mode, 
right (both instances)?


If that fixes your your problem and passes the usual bootstrap and 
regression test, then it's OK with me.


I can see Richard S. getting in here one day and saying, umm, this all 
needs further refinement, but at least this hunk of code is in better shape.


jeff




Re: [C++] Handle || ! for simd vectors

2014-10-13 Thread Marc Glisse

Ping https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00361.html
(sorry that my message looked like I had committed as obvious)

On Sat, 4 Oct 2014, Marc Glisse wrote:


On Thu, 2 Oct 2014, Jason Merrill wrote:


OK.


Thanks. While committing, I noticed that I restricted ! to integer vectors, 
whereas it seems to work just fine with scalar floats, so it would make sense 
to extend it to float vectors. Tested on x86_64-linux-gnu.


2014-10-04  Marc Glisse  marc.gli...@inria.fr

gcc/cp/
* typeck.c (cp_build_unary_op) [TRUTH_NOT_EXPR]: Accept float vectors.
gcc/testsuite/
* g++.dg/ext/vector9.C: Test ! with float vectors.


--
Marc Glisse


Re: [PATCH, Pointer Bounds Checker 14/x] Passes [2/n] IPA passes

2014-10-13 Thread Jeff Law

On 10/13/14 04:48, Ilya Enkovich wrote:

--
2014-10-13  Ilya Enkovich  ilya.enkov...@intel.com

* ipa-chkp.c: New.
* ipa-chkp.h: New.
* Makefile.in (OBJS): Add ipa-chkp.o.
* c-family/c-common.c (c_common_attribute_table): Fix documentation.
(c_common_format_attribute_table): Likewsie.


OK for the trunk.

jeff



Re: [PATCH IRA] update_equiv_regs fails to set EQUIV reg-note for pseudo with more than one definition

2014-10-13 Thread Jeff Law

On 10/11/14 06:44, Felix Yang wrote:

Hello Jeff,

 I see that you have improved the RTL typesafety issue for ira.c,
so I rebased this patch
 on the latest trunk and change to use the new list walking interface.
 Bootstrapped on x86_64-SUSE-Linux and make check regression tested.
 OK for trunk?

OK for the trunk.

Thanks for your patience.

jeff



[PATCH] Handle CFA adjust notes in combine stack adjustments

2014-10-13 Thread Richard Henderson
I was hitting this during a i386 bootstrap, wherein csa managed to combine two
insns that both had REG_CFA_ADJUST_CFA notes, but didn't modify the notes.
This lead to an abort later in dwarf2cfi, when the inconsistency was noticed.

I'm not quite sure why this was happening on my branch but not mainline, but
I'm applying it here anyway.

Tested on i686 and x86_64.


r~
2014-10-13  Richard Henderson  r...@redhat.com

* combine-stack-adj.c (no_unhandled_cfa): New.
(maybe_merge_cfa_adjust): New.
(combine_stack_adjustments_for_block): Use them.

* g++.dg/torture/20141013.C: New.

diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c
index aebdf87..844873c 100644
--- a/gcc/combine-stack-adj.c
+++ b/gcc/combine-stack-adj.c
@@ -190,6 +190,44 @@ record_one_stack_ref (rtx_insn *insn, rtx *ref, struct 
csa_reflist *next_reflist
   return ml;
 }
 
+/* We only know how to adjust the CFA; no other frame-related changes
+   may appear in any insn to be deleted.  */
+
+static bool
+no_unhandled_cfa (rtx_insn *insn)
+{
+  if (!RTX_FRAME_RELATED_P (insn))
+return true;
+
+  /* No CFA notes at all is a legacy interpretation like
+ FRAME_RELATED_EXPR, and is context sensitive within
+ the prologue state machine.  We can't handle that here.  */
+  bool has_cfa_adjust = false;
+
+  for (rtx link = REG_NOTES (insn); link; link = XEXP (link, 1))
+switch (REG_NOTE_KIND (link))
+  {
+  default:
+break;
+  case REG_CFA_ADJUST_CFA:
+   has_cfa_adjust = true;
+   break;
+
+  case REG_FRAME_RELATED_EXPR:
+  case REG_CFA_DEF_CFA:
+  case REG_CFA_OFFSET:
+  case REG_CFA_REGISTER:
+  case REG_CFA_EXPRESSION:
+  case REG_CFA_RESTORE:
+  case REG_CFA_SET_VDRAP:
+  case REG_CFA_WINDOW_SAVE:
+  case REG_CFA_FLUSH_QUEUE:
+   return false;
+  }
+
+  return has_cfa_adjust;
+}
+
 /* Attempt to apply ADJUST to the stack adjusting insn INSN, as well
as each of the memories and stack references in REFLIST.  Return true
on success.  */
@@ -320,6 +358,44 @@ maybe_move_args_size_note (rtx_insn *last, rtx_insn *insn, 
bool after)
 add_reg_note (last, REG_ARGS_SIZE, XEXP (note, 0));
 }
 
+/* Merge any REG_CFA_ADJUST_CFA note from SRC into DST.
+   AFTER is true iff DST follows SRC in the instruction stream.  */
+
+static void
+maybe_merge_cfa_adjust (rtx_insn *dst, rtx_insn *src, bool after)
+{
+  rtx snote = NULL, dnote = NULL;
+  rtx sexp, dexp;
+  rtx exp1, exp2;
+
+  if (RTX_FRAME_RELATED_P (src))
+snote = find_reg_note (src, REG_CFA_ADJUST_CFA, NULL_RTX);
+  if (snote == NULL)
+return;
+  sexp = XEXP (snote, 0);
+
+  if (RTX_FRAME_RELATED_P (dst))
+dnote = find_reg_note (dst, REG_CFA_ADJUST_CFA, NULL_RTX);
+  if (dnote == NULL)
+{
+  add_reg_note (dst, REG_CFA_ADJUST_CFA, sexp);
+  return;
+}
+  dexp = XEXP (dnote, 0);
+
+  gcc_assert (GET_CODE (sexp) == SET);
+  gcc_assert (GET_CODE (dexp) == SET);
+
+  if (after)
+exp1 = dexp, exp2 = sexp;
+  else
+exp1 = sexp, exp2 = dexp;
+
+  SET_SRC (exp1) = simplify_replace_rtx (SET_SRC (exp1), SET_DEST (exp2),
+SET_SRC (exp2));
+  XEXP (dnote, 0) = exp1;
+}
+
 /* Return the next (or previous) active insn within BB.  */
 
 static rtx_insn *
@@ -491,12 +567,15 @@ combine_stack_adjustments_for_block (basic_block bb)
  /* Combine an allocation into the first instruction.  */
  if (STACK_GROWS_DOWNWARD ? this_adjust = 0 : this_adjust = 0)
{
- if (try_apply_stack_adjustment (last_sp_set, reflist,
- last_sp_adjust + this_adjust,
- this_adjust))
+ if (no_unhandled_cfa (insn)
+  try_apply_stack_adjustment (last_sp_set, reflist,
+last_sp_adjust
++ this_adjust,
+this_adjust))
{
  /* It worked!  */
  maybe_move_args_size_note (last_sp_set, insn, false);
+ maybe_merge_cfa_adjust (last_sp_set, insn, false);
  delete_insn (insn);
  last_sp_adjust += this_adjust;
  continue;
@@ -508,12 +587,15 @@ combine_stack_adjustments_for_block (basic_block bb)
  else if (STACK_GROWS_DOWNWARD
   ? last_sp_adjust = 0 : last_sp_adjust = 0)
{
- if (try_apply_stack_adjustment (insn, reflist,
- last_sp_adjust + this_adjust,
- -last_sp_adjust))
+ if (no_unhandled_cfa (last_sp_set)
+  try_apply_stack_adjustment (insn, reflist

  1   2   >