Re: [PATCH x86] Increase PARAM_MAX_COMPLETELY_PEELED_INSNS when branch is costly
On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko evstu...@gmail.com wrote: Hi, The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. Bootstrap and make check are in progress. The patch boosts (up to 2,5 times improve) several benchmarks compiled with -Ofast on Silvermont Spec2000: +5% gain on 173.applu +1% gain on 255.vortex Is it ok for trunk when pass bootstrap and make check? This is only a 20% increase - from 100 to 120. I would instead suggest to explore doing this change unconditionally if it helps that much. Richard. Thanks, Evgeny 2014-10-10 Evgeny Stupachenko evstu...@gmail.com * config/i386/i386.c (ix86_option_override_internal): Increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates CPUs with high branch cost. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6337aa5..5ac10eb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, opts-x_param_values, opts_set-x_param_values); + /* Extend full peel max insns parameter for CPUs with high branch cost. */ + if (TARGET_HIGH_BRANCH_COST) +maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, + 120, + opts-x_param_values, + opts_set-x_param_values); + + /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ if (opts-x_flag_prefetch_loop_arrays 0 HAVE_prefetch diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2c64162..da0c57b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_INTER_UNIT_CONVERSIONS \ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] +#define TARGET_HIGH_BRANCH_COST ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] #define TARGET_SCHEDULEix86_tune_features[X86_TUNE_SCHEDULE] #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index b6b210e..04d8bf8 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, four_jump_limit, m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | m_ATHLON_K8 | m_AMDFAM10) +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This could be + used to tune unroll, if-cvt, inline... heuristics. */ +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, high_branch_cost, + m_BONNELL | m_SILVERMONT | m_INTEL) + /*/ /* Integer instruction selection tuning */ /*/
[PATCH] Fix typo in comment for IRA
Hi all: This patch contain lots typo fix for IRA module by aspell :) ChangeLog 2014-10-13 Kito Cheng k...@0xlab.org * ira.c: Fix typo in comment. * ira.h: Ditto. * ira-build.c: Ditto. * ira-color.c: Ditto. * ira-emit.c: Ditto. * ira-int.h: Ditto. * ira-lives.c: Ditto. From e7268d1f6e3367a345b2e614a21e596c6ccf621f Mon Sep 17 00:00:00 2001 From: Kito Cheng k...@andestech.com Date: Fri, 22 Aug 2014 16:27:18 +0800 Subject: [PATCH] Fix typo in comment for IRA 2014-10-13 Kito Cheng k...@0xlab.org * ira.c: Fix typo in comment. * ira.h: Ditto. * ira-build.c: Ditto. * ira-color.c: Ditto. * ira-emit.c: Ditto. * ira-int.h: Ditto. * ira-lives.c: Ditto. --- gcc/ira-build.c | 2 +- gcc/ira-color.c | 10 +- gcc/ira-costs.c | 8 gcc/ira-emit.c | 4 ++-- gcc/ira-int.h | 4 ++-- gcc/ira-lives.c | 2 +- gcc/ira.c | 24 gcc/ira.h | 2 +- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/gcc/ira-build.c b/gcc/ira-build.c index 9c99166..98df8cd 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -1224,7 +1224,7 @@ ira_create_pref (ira_allocno_t a, int hard_regno, int freq) return pref; } -/* Attach a pref PREF to the cooresponding allocno. */ +/* Attach a pref PREF to the corresponding allocno. */ static void add_allocno_pref_to_list (ira_pref_t pref) { diff --git a/gcc/ira-color.c b/gcc/ira-color.c index 841d0d1..a25022f 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -104,7 +104,7 @@ struct update_cost_record struct allocno_color_data { /* TRUE value means that the allocno was not removed yet from the - conflicting graph during colouring. */ + conflicting graph during coloring. */ unsigned int in_graph_p : 1; /* TRUE if it is put on the stack to make other allocnos colorable. */ @@ -1203,7 +1203,7 @@ struct update_cost_queue_elem connecting this allocno to the one being allocated. */ int divisor; - /* Allocno from which we are chaning costs of connected allocnos. + /* Allocno from which we are chaining costs of connected allocnos. It is used not go back in graph of allocnos connected by copies. */ ira_allocno_t from; @@ -1928,7 +1928,7 @@ copy_freq_compare_func (const void *v1p, const void *v2p) if (pri2 - pri1) return pri2 - pri1; - /* If freqencies are equal, sort by copies, so that the results of + /* If frequencies are equal, sort by copies, so that the results of qsort leave nothing to chance. */ return cp1-num - cp2-num; } @@ -1983,7 +1983,7 @@ merge_threads (ira_allocno_t t1, ira_allocno_t t2) ALLOCNO_COLOR_DATA (t1)-thread_freq += ALLOCNO_COLOR_DATA (t2)-thread_freq; } -/* Create threads by processing CP_NUM copies from sorted)ciopeis. We +/* Create threads by processing CP_NUM copies from sorted copies. We process the most expensive copies first. */ static void form_threads_from_copies (int cp_num) @@ -3606,7 +3606,7 @@ conflict_by_live_ranges_p (int regno1, int regno2) ira_assert (regno1 = FIRST_PSEUDO_REGISTER regno2 = FIRST_PSEUDO_REGISTER); - /* Reg info caclulated by dataflow infrastructure can be different + /* Reg info calculated by dataflow infrastructure can be different from one calculated by regclass. */ if ((a1 = ira_loop_tree_root-regno_allocno_map[regno1]) == NULL || (a2 = ira_loop_tree_root-regno_allocno_map[regno2]) == NULL) diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c index 38d0e0e..5ecbc0b 100644 --- a/gcc/ira-costs.c +++ b/gcc/ira-costs.c @@ -118,7 +118,7 @@ struct cost_classes /* Container of the cost classes. */ enum reg_class classes[N_REG_CLASSES]; /* Map reg class - index of the reg class in the previous array. - -1 if it is not a cost classe. */ + -1 if it is not a cost classes. */ int index[N_REG_CLASSES]; /* Map hard regno index of first class in array CLASSES containing the hard regno, -1 otherwise. */ @@ -277,7 +277,7 @@ setup_regno_cost_classes_by_aclass (int regno, enum reg_class aclass) decrease number of cost classes for the pseudo, if hard registers of some important classes can not hold a value of MODE. So the pseudo can not get hard register of some important classes and cost - calculation for such important classes is only waisting CPU + calculation for such important classes is only wasting CPU time. */ static void setup_regno_cost_classes_by_mode (int regno, enum machine_mode mode) @@ -314,7 +314,7 @@ setup_regno_cost_classes_by_mode (int regno, enum machine_mode mode) regno_cost_classes[regno] = classes_ptr; } -/* Finilize info about the cost classes for each pseudo. */ +/* Finalize info about the cost classes for each pseudo. */ static void finish_regno_cost_classes (void) { @@ -1238,7 +1238,7 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref) then we may want to adjust the cost of that register
Re: [gofrontend-dev] Re: [PATCH 03/13] HACK! Allow the static chain to be set from C
On Sat, Oct 11, 2014 at 6:23 AM, Richard Henderson r...@redhat.com wrote: On 10/10/2014 06:42 PM, Peter Collingbourne wrote: A colleague has suggested a perhaps nicer syntax: __builtin_call_chain(pointer, call) where call must be a call expression I like this. Unlike the other suggestions, it doesn't mess with the parsing of the regular part of the function call. And, depending on what point the builtin is lowered and applied to the AST, it might not require any parsing changes at all. I'll have a look at this next week. Thanks. Does the frontend know that the call expects a static chain? If so I like Ians suggestion more: How crazy would it be to move __builtin_call_chain into the function arguments, as in function(a1, a2, __builtin_call_chain(pointer)) This __builtin_call_chain call would be removed from the argument list so type checking would only look at a1, a2. It would just set the static chain value. That at least puts the call_chain in the right place, which is a special kind of function argument. Richard. r~
[Ada] Fix PR ada/63225
As reported by Alan, the Ada compiler doesn't build anymore if you compile it with -fno-inline because cuintp.c references a private function of uintp.adb. Tested on x86_64-suse-linux, applied on all active branches. 2014-10-13 Eric Botcazou ebotca...@adacore.com Alan Modra amo...@gmail.com PR ada/63225 * uintp.adb (Vector_To_Uint): Move from here to... * uintp.ads (UI_Vector): Make public. (Vector_To_Uint): ...here. -- Eric BotcazouIndex: uintp.ads === --- uintp.ads (revision 216116) +++ uintp.ads (working copy) @@ -90,6 +90,18 @@ package Uintp is Uint_Minus_80 : constant Uint; Uint_Minus_128 : constant Uint; + type UI_Vector is array (Pos range ) of Int; + -- Vector containing the integer values of a Uint value + + -- Note: An earlier version of this package used pointers of arrays of Ints + -- (dynamically allocated) for the Uint type. The change leads to a few + -- less natural idioms used throughout this code, but eliminates all uses + -- of the heap except for the table package itself. For example, Uint + -- parameters are often converted to UI_Vectors for internal manipulation. + -- This is done by creating the local UI_Vector using the function N_Digits + -- on the Uint to find the size needed for the vector, and then calling + -- Init_Operand to copy the values out of the table into the vector. + - -- Subprograms -- - @@ -252,6 +264,22 @@ package Uintp is -- function is used for capacity checks, and it can be one bit off -- without affecting its usage. + function Vector_To_Uint + (In_Vec : UI_Vector; + Negative : Boolean) return Uint; + -- Functions that calculate values in UI_Vectors, call this function to + -- create and return the Uint value. In_Vec contains the multiple precision + -- (Base) representation of a non-negative value. Leading zeroes are + -- permitted. Negative is set if the desired result is the negative of the + -- given value. The result will be either the appropriate directly + -- represented value, or a table entry in the proper canonical format is + -- created and returned. + -- + -- Note that Init_Operand puts a signed value in the result vector, but + -- Vector_To_Uint is always presented with a non-negative value. The + -- processing of signs is something that is done by the caller before + -- calling Vector_To_Uint. + - -- Output Routines -- - @@ -494,18 +522,6 @@ private -- UI_Vector is defined for this purpose and some internal subprograms -- used for converting from one to the other are defined. - type UI_Vector is array (Pos range ) of Int; - -- Vector containing the integer values of a Uint value - - -- Note: An earlier version of this package used pointers of arrays of Ints - -- (dynamically allocated) for the Uint type. The change leads to a few - -- less natural idioms used throughout this code, but eliminates all uses - -- of the heap except for the table package itself. For example, Uint - -- parameters are often converted to UI_Vectors for internal manipulation. - -- This is done by creating the local UI_Vector using the function N_Digits - -- on the Uint to find the size needed for the vector, and then calling - -- Init_Operand to copy the values out of the table into the vector. - type Uint_Entry is record Length : Pos; -- Length of entry in Udigits table in digits (i.e. in words) Index: uintp.adb === --- uintp.adb (revision 216116) +++ uintp.adb (working copy) @@ -171,22 +171,6 @@ package body Uintp is -- If Discard_Quotient is True, Quotient is set to No_Uint -- If Discard_Remainder is True, Remainder is set to No_Uint - function Vector_To_Uint - (In_Vec : UI_Vector; - Negative : Boolean) return Uint; - -- Functions that calculate values in UI_Vectors, call this function to - -- create and return the Uint value. In_Vec contains the multiple precision - -- (Base) representation of a non-negative value. Leading zeroes are - -- permitted. Negative is set if the desired result is the negative of the - -- given value. The result will be either the appropriate directly - -- represented value, or a table entry in the proper canonical format is - -- created and returned. - -- - -- Note that Init_Operand puts a signed value in the result vector, but - -- Vector_To_Uint is always presented with a non-negative value. The - -- processing of signs is something that is done by the caller before - -- calling Vector_To_Uint. - -- Direct --
Re: [PATCH] PR debug/38757 gcc does not emit DW_LANG_C99
On Wed, 2014-10-08 at 12:13 +0200, Mark Wielaard wrote: This patch was written a long time ago by Jakub and has been in Fedora gcc for some time. All I did was rebase it to current gcc trunk and add a testcase. Back when it was originally proposed the issue was that because DWARF was generated late adding new lang hooks for this was problematic for LTO. Now that there is a move towards generating DWARF early I am hoping this patch can finally make it to mainline gcc. Ping. This lang hook will be more useful when DWARFv5 gets out, which is supposed to define language identifiers for newer C and C++ versions. Previous discussions: http://gcc.gnu.org/ml/gcc-patches/2009-03/msg00858.html http://gcc.gnu.org/ml/gcc-patches/2010-04/msg00991.html gcc/ChangeLog 2009-03-18 Jakub Jelinek ja...@redhat.com PR debug/38757 * langhooks.h (struct lang_hooks): Add source_language langhook. * langhooks-def.h (LANG_HOOKS_SOURCE_LANGUAGE): Define to NULL. (LANG_HOOKS_INITIALIZER): Add LANG_HOOKS_SOURCE_LANGUAGE. * dwarf2out.c (add_prototyped_attribute): Add DW_AT_prototype also for DW_LANG_{C,C99,ObjC}. (gen_compile_unit_die): Use lang_hooks.source_language () to determine if DW_LANG_C99 or DW_LANG_C89 should be returned. gcc/c/ChangeLog 2009-03-18 Jakub Jelinek ja...@redhat.com PR debug/38757 * c-lang.c (c_source_language): New function. (LANG_HOOKS_SOURCE_LANGUAGE): Define. gcc/testsuite/ChangeLog 2014-10-08 Mark Wielaard m...@redhat.com PR debug/38757 * gcc.dg/debug/dwarf2/lang-c89.c: New test. * gcc.dg/debug/dwarf2/lang-c99.c: Likewise. --- gcc/ChangeLog| 11 +++ gcc/c/ChangeLog | 6 ++ gcc/c/c-lang.c | 8 gcc/dwarf2out.c | 19 --- gcc/langhooks-def.h | 4 +++- gcc/langhooks.h | 4 gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c | 6 ++ gcc/testsuite/gcc.dg/debug/dwarf2/lang-c99.c | 6 ++ 9 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2/lang-c99.c diff --git a/gcc/c/c-lang.c b/gcc/c/c-lang.c index 97c0443..dadeb1a 100644 --- a/gcc/c/c-lang.c +++ b/gcc/c/c-lang.c @@ -35,6 +35,12 @@ along with GCC; see the file COPYING3. If not see enum c_language_kind c_language = clk_c; +static int +c_source_language (void) +{ + return flag_isoc99 ? 1999 : 1989; +} + /* Lang hooks common to C and ObjC are declared in c-objc-common.h; consequently, there should be very few hooks below. */ @@ -44,6 +50,8 @@ enum c_language_kind c_language = clk_c; #define LANG_HOOKS_INIT c_objc_common_init #undef LANG_HOOKS_INIT_TS #define LANG_HOOKS_INIT_TS c_common_init_ts +#undef LANG_HOOKS_SOURCE_LANGUAGE +#define LANG_HOOKS_SOURCE_LANGUAGE c_source_language /* Each front end provides its own lang hook initializer. */ struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER; diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 59c05ed..4932cd0 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -16708,9 +16708,18 @@ add_bit_size_attribute (dw_die_ref die, tree decl) static inline void add_prototyped_attribute (dw_die_ref die, tree func_type) { - if (get_AT_unsigned (comp_unit_die (), DW_AT_language) == DW_LANG_C89 - prototype_p (func_type)) -add_AT_flag (die, DW_AT_prototyped, 1); + switch (get_AT_unsigned (comp_unit_die (), DW_AT_language)) +{ +case DW_LANG_C: +case DW_LANG_C89: +case DW_LANG_C99: +case DW_LANG_ObjC: + if (prototype_p (func_type)) + add_AT_flag (die, DW_AT_prototyped, 1); + break; +default: + break; +} } /* Add an 'abstract_origin' attribute below a given DIE. The DIE is found @@ -19544,6 +19553,10 @@ gen_compile_unit_die (const char *filename) language = DW_LANG_ObjC; else if (strcmp (language_string, GNU Objective-C++) == 0) language = DW_LANG_ObjC_plus_plus; + else if (strcmp (language_string, GNU C) == 0 + lang_hooks.source_language + lang_hooks.source_language () = 1999) + language = DW_LANG_C99; else if (dwarf_version = 5 || !dwarf_strict) { if (strcmp (language_string, GNU Go) == 0) diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h index e5ae3e3..b6c8dd4 100644 --- a/gcc/langhooks-def.h +++ b/gcc/langhooks-def.h @@ -120,6 +120,7 @@ extern bool lhd_omp_mappable_type (tree); #define LANG_HOOKS_BLOCK_MAY_FALLTHRUhook_bool_const_tree_true #define LANG_HOOKS_EH_USE_CXA_END_CLEANUPfalse #define LANG_HOOKS_DEEP_UNSHARINGfalse
Re: [PATCH] cleanups in line-map
Manuel López-Ibáñez lopeziba...@gmail.com writes: A few cleanups in line-map code. Bootstrapped and regression tested on x86_64-linux-gnu. Thanks for doing this. OK? Yes, barring this little nit: [...] Index: libcpp/line-map.c === --- libcpp/line-map.c (revision 216098) +++ libcpp/line-map.c (working copy) @@ -29,12 +29,10 @@ along with this program; see the file CO static void trace_include (const struct line_maps *, const struct line_map *); static const struct line_map * linemap_ordinary_map_lookup (struct line_maps *, source_location); static const struct line_map* linemap_macro_map_lookup (struct line_maps *, source_location); -static source_location linemap_macro_map_loc_to_def_point -(const struct line_map*, source_location); This is not redundant per se, is it? It's just a forward declaration of the function that is defined later. Just like for linemap_macro_map_loc_unwind_toward_spelling() below. Or what am I missing? I'd prefer to see this forward declaration stay, FWIW. Otherwise, this cleanup patch looks good to me. If it was my call, I'd say OK with that change. Thank you for tackling this. -- Dodji
Re: [v3, patch] Move std::list::_M_size closer to iterators
On 12/10/14 21:41 +0200, Marc Glisse wrote: Hello, this patch moves _M_size in std::list to a place where it should be easier (no offsetof) to access from an iterator (it doesn't matter when you access it from the main std::list object). It does not take advantage of it yet, that can be done at any time, whereas the layout will soon be fixed. This triggered one of the issues listed in PR 63345 (casting to a node* even for the sentinel), which I am fixing here. Nice, OK for trunk, thanks.
Re: [PATCH] cleanups in line-map
On 13 October 2014 10:52, Dodji Seketeli do...@redhat.com wrote: Manuel López-Ibáñez lopeziba...@gmail.com writes: Index: libcpp/line-map.c === --- libcpp/line-map.c (revision 216098) +++ libcpp/line-map.c (working copy) @@ -29,12 +29,10 @@ along with this program; see the file CO static void trace_include (const struct line_maps *, const struct line_map *); static const struct line_map * linemap_ordinary_map_lookup (struct line_maps *, source_location); static const struct line_map* linemap_macro_map_lookup (struct line_maps *, source_location); -static source_location linemap_macro_map_loc_to_def_point -(const struct line_map*, source_location); This is not redundant per se, is it? It's just a forward declaration of the function that is defined later. Just like for linemap_macro_map_loc_unwind_toward_spelling() below. Or what am I missing? I'd prefer to see this forward declaration stay, FWIW. Oh, well, I guess it is a matter of taste. I was annoyed by having to update two different places (I added const to the first argument of this definition function, so I will have to also add it here). Moreover, as the patch shows, the two declarations might be different (one was static, the other not), and then which one is the correct one requires some expert knowledge of C++. I understand using forward declarations when otherwise it would be a mess to re-order the functions, but in this case, it is not really necessary. But I can leave it and just update the argument type. Otherwise, this cleanup patch looks good to me. If it was my call, I'd say OK with that change. Thank you for tackling this. Thanks for the review. Cheers, Manuel.
Re: [PATCH,1/2] Extended if-conversion for loops marked with pragma omp simd.
Richard, Here is updated patch (part1) for extended if conversion. Second part of patch will be sent later. Changelog. 2014-10-13 Yuri Rumyantsev ysrum...@gmail.com * tree-if-conv.c (cgraph.h): Add include file to detect function clone. (flag_force_vectorize): New variable. (edge_predicate): New function. (set_edge_predicate): New function. (add_to_predicate_list): Check unconditionally that bb is always executed to early exit. Use predicate of cd-equivalent block for join blocks if it exists. (add_to_dst_predicate_list): Invoke add_to_predicate_list if destination block of edge is not always executed. Set-up predicate for critical edge. (if_convertible_phi_p): Accept phi nodes with more than two args if FLAG_FORCE_VECTORIZE was set-up. (ifcvt_can_use_mask_load_store): Use FLAG_FORCE_VECTORIZE. (if_convertible_stmt_p): Fix up pre-function comments. (all_edges_are_critical): New function. (if_convertible_bb_p): Allow bb has more than two predecessors if FLAG_FORCE_VECTORIZE was set-up. Use call of all_edges_are_critical to reject block if-conversion with incoming critical edges only if FLAG_FORCE_VECTORIZE was not set-up. (predicate_bbs): Skip loop exit block also. Add check that if fold_build2 produces bool conversion, recompute predicate using build2_loc. Add zeroing of edge 'aux' field under FLAG_FORCE_VECTORIZE. (if_convertible_loop_p_1): Recompute POST_DOMINATOR tree if FLAG_FORCE_VECTORIZE was set-up to calculate cd equivalent bb's. (find_phi_replacement_condition): Extend function interface: it returns NULL if given phi node must be handled by means of extended phi node predication. If number of predecessors of phi-block is equal 2 and atleast one incoming edge is not critical original algorithm is used. (get_predicate_for_edge): New function. (find_insertion_point): New function. (predicate_arbitrary_scalar_phi): New function. (predicate_all_scalar_phis): Introduce new variable BEFORE. Invoke find_insertion_point to initialize gsi and predicate_arbitrary_scalar_phi if TRUE_BB is NULL - it signals that extended predication must be applied). (insert_gimplified_predicates): Add test for non-predicated basic blocks that there are no gimplified statements to insert. Insert predicates at the block begining for extended if-conversion. (tree_if_conversion): Initialize flag_force_vectorize from current loop or outer loop (to support pragma omp declare).Do loop versioning for innermost loop marked with pragma omp simd and FLAG_TREE_LOOP_IF_CONVERT was not sett-up. Nullify 'aux' field of edges for blocks with two successors. 2014-09-22 12:28 GMT+04:00 Yuri Rumyantsev ysrum...@gmail.com: Richard, here is reduced patch (part.1) which was reduced almost twice. Let's me also answer on your comments. 1. I really use edge field 'aux' to keep predicate for critical edges. My previous code was not correct and now it looks like: if (EDGE_COUNT (b-succs) == 1 || EDGE_COUNT (e-dest-preds) == 1) /* Edge E is not critical, use predicate of edge source bb. */ c = bb_predicate (b); else /* Edge E is critical and its aux field contains predicate. */ c = edge_predicate (e); 2. I completely delete all code related to creation of conditional expressions and completely rely on bool pattern recognition in vectorizer. But we need to delete all dead predicate computations which are not used since they prevent vectorization. I will add this local-dce function in next patch. 3. I also did not include in this patch recognition of general phi-nodes with two arguments only for which conversion of conditional scalar reduction can be applied also. Note that all these changes are applied for loop marked with pragma omp simd only. 2014-09-22 Yuri Rumyantsev ysrum...@gmail.com * tree-if-conv.c (cgraph.h): Add include file to detect function clone. (flag_force_vectorize): New variable. (edge_predicate): New function. (set_edge_predicate): New function. (convert_name_to_cmp): New function. (add_to_predicate_list): Check unconditionally that bb is always executed to early exit. Use predicate of cd-equivalent block for join blocks if it exists. (add_to_dst_predicate_list): Invoke add_to_predicate_list if destination block of edge is not always executed. Set-up predicate for critical edge. (if_convertible_phi_p): Accept phi nodes with more than two args if FLAG_FORCE_VECTORIZE was set-up. (ifcvt_can_use_mask_load_store): Use FLAG_FORCE_VECTORIZE. (if_convertible_stmt_p): Fix up pre-function comments. (all_edges_are_critical): New function. (if_convertible_bb_p): Allow bb has more than two predecessors if FLAG_FORCE_VECTORIZE was set-up. Use call of all_edges_are_critical to reject block if-conversion with incoming critical edges only if FLAG_FORCE_VECTORIZE was not set-up. (predicate_bbs): Skip loop exit block also. Add check that if fold_build2 produces bool conversion, recompute predicate using build2_loc. Add zeroing of edge 'aux' field under
[PATCH, 2/2] Extended if-conversion for loops marked with pragma omp simd.
Richard, Here is second part of patch which includes 1. One particular phi node recognition - if phi function has more than 2 arguments but it has only two really different arguments and one argument has the only occurrence. This is important for conditional scalar reduction conversion, e.g. for such test-case; if (a[i] != 0 b[i] != 0) n++; 2. New sub-pass which repairs bool pattern candidates with multiple uses - such situation occurres if the same predicate (not simple compare) is used for phi node conversion and load/store mask. If for some var participating in tree traversal its def stmt has multiple uses we create copy of this definition with unique left hand side and change one use of original vat to newly created one. We repeat this process until all multiple uses will be deleted. 3. Another sub-pass which delete redundant predicate computations which are dead locally, i.e. local dead code elimination. Note that such dead code can prevent loop vectorization. Changelog: 2014-10-13 Yuri Rumyantsev ysrum...@gmail.com * tree-if-conv.c (cgraph.h): Add include file to issue error message. (phi_has_two_different_args): New function. (is_cond_scalar_reduction): Add argument EXTENDED to choose access to phi arguments. Invoke phi_has_two_different_args to get phi arguments iff EXTENDED is true. Change check stmt-block is predecessor of phi-block since phi may haave more than two arguments. (convert_scalar_cond_reduction): Add argument BEFORE to insert statement before/after gsi point. (predicate_scalar_phi): Add argument false to call of is_cond_scalar_reduction. Add argument true to call of convert_scalar_cond_reduction. (predicate_arbitrary_scalar_phi): Change result of function to tree representing rhs of new phi replacement stmt. (predicate_extended_scalar_phi): New function. (predicate_all_scalar_phis): Invoke predicate_extended_scalar_phi instead of predicate_arbitrary_scalar_phi. (ifcvt_split_def_stmt): New function. (ifcvt_walk_pattern_tree): New function. (stmt_is_root_of_bool_pattern): New function. (ifcvt_repair_bool_pattern): New function. (ifcvt_local_dce): New function. (tree_if_conversion): Invoke ifcvt_local_dce and ifcvt_repair_bool_pattern under FLAG_FORCE_VECTORIZE. patch.part-2 Description: Binary data
[AArch64] [BE] [2/2] Make large opaque integer modes endianness-safe.
Hi, This is the second patch of the work to fix: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59810 and removes the CANNOT_CHANGE_MODE_CLASS macro, which now permits subregs of vector registers to work correctly on aarch64_be. NOTE: This patch depends upon the following: [AArch64] [BE] [1/2] Make large opaque integer modes endianness-safe. [AArch64] [BE] Fix vector load/stores to not use ld1/st1 Thanks, David. ChangeLog: gcc/: 2014-13-10 David Sherwood david.sherw...@arm.com * config/aarch64/aarch64.h (CLEAR_INSN_CACHE): Removed. * config/aarch64/aarch64.c (aarch64_cannot_change_mode_class): Removed. * config/aarch64/aarch64-protos.h (aarch64_cannot_change_mode_class): Removed. ccmc_v2.patch Description: Binary data
Re: [PATCH 1/2] libstdc++: Add std::align.
On 16/04/14 17:06 +0200, Rüdiger Sonderfeld wrote: C++11 [ptr.align]. This should probably not be inline. But for now this avoids any ABI changes. * libstdc++-v3/testsuite/20_util/align/1.cc: New file. * libstdc++-v3/include/std/memory (align): New function. Fixed (the new function should be in namespace std!) and applied to trunk. Thanks for the patch. Tested x86_64-linux. commit d49cd05620599277845f89325ac8a43622650e8f Author: Jonathan Wakely jwak...@redhat.com Date: Mon Oct 13 10:50:16 2014 +0100 2014-10-13 R??diger Sonderfeld ruedi...@c-plusplus.de * include/std/memory (align): Define. * testsuite/20_util/align/1.cc: New. diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory index dafec0c..affc8b1 100644 --- a/libstdc++-v3/include/std/memory +++ b/libstdc++-v3/include/std/memory @@ -87,4 +87,46 @@ # include backward/auto_ptr.h #endif +#if __cplusplus = 201103L +# include cstdint +# ifdef _GLIBCXX_USE_C99_STDINT_TR1 +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +/** + * @brief Fit aligned storage in buffer. + * + * [ptr.align] + * + * This function tries to fit @a __size bytes of storage with alignment + * @a __align into the buffer @a __ptr of size @a __space bytes. If such + * a buffer fits then @a __ptr is changed to point to the first byte of the + * aligned storage and @a __space is reduced by the bytes used for alignment. + * + * @param __align A fundamental or extended alignment value. + * @param __sizeSize of the aligned storage required. + * @param __ptr Pointer to a buffer of @a __space bytes. + * @param __space Size of the buffer pointed to by @a __ptr. + * @return the updated pointer if the aligned storage fits, otherwise nullptr. + */ +inline void* +align(size_t __align, size_t __size, void* __ptr, size_t __space) noexcept +{ + const size_t __diff = __align - reinterpret_castuintptr_t(__ptr) % __align; + if (__diff + __size = __space) +return nullptr; + else +{ + __space -= __diff; + __ptr = static_castchar*(__ptr) + __diff; + return __ptr; +} +} + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace +#endif // _GLIBCXX_USE_C99_STDINT_TR1 +#endif // C++11 + #endif /* _GLIBCXX_MEMORY */ diff --git a/libstdc++-v3/testsuite/20_util/align/1.cc b/libstdc++-v3/testsuite/20_util/align/1.cc new file mode 100644 index 000..d1f94e9 --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/align/1.cc @@ -0,0 +1,61 @@ +// { dg-options -std=gnu++11 } + +// 2014-04-16 R??diger Sonderfeld ruedi...@c-plusplus.de + +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING3. If not see +// http://www.gnu.org/licenses/. + +// C++11 [ptr.align] (20.6.5): std::align + +#include memory +#include cstdint +#include testsuite_hooks.h + +void +test01() +{ + bool test __attribute__((unused)) = true; + + size_t space = 100; + void* ptr = new char[space]; + char* const orig_ptr = static_castchar*(ptr); + char* old_ptr = orig_ptr; + const size_t orig_space = space; + size_t old_space = space; + const size_t alignment = 16; + const size_t size = 10; + while( void* const r = std::align(alignment, size, ptr, space) ) +{ + VERIFY( r == ptr ); + uintptr_t p = reinterpret_castuintptr_t(ptr); + VERIFY( p % alignment == 0 ); + char* const x = static_castchar*(ptr); + VERIFY( x - old_ptr == old_space - space ); + VERIFY( (void*)x (void*)(orig_ptr + orig_space) ); + VERIFY( (void*)(x + size) (void*)(orig_ptr + orig_space) ); + ptr = x + size; + old_ptr = x; + old_space = space; + space -= size; +} + delete [] orig_ptr; +} + +int main() +{ + test01(); +}
Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling
On Sat, Oct 11, 2014 at 06:49:00PM +0400, Ilya Verbin wrote: This is the last common infrastructure patch in the series. (Next patches will contain tests for libgomp testsuite and MIC specific things) It introduces 2 new options: 1. -foffload=targets=options By default, GCC will build offload images for all offload targets specified in configure, with non-target-specific options passed to host compiler. This option is used to control offload targets and options for them. It can be used in a few ways: * -foffload=disable Tells GCC to disable offload support. OpenMP target regions will be run in host fallback mode. * -foffload=targets Tells GCC to build offload images for targets. They will be built with non-target-specific options passed to host compiler. * -foffload=options Tells GCC to build offload images for all targets specified in configure. They will be built with non-target-specific options passed to host compiler plus options. * -foffload=targets=options Tells GCC to build offload images for targets. They will be built with non-target-specific options passed to host compiler plus options. Options specified by -foffload are appended to the end of option set, so in case of option conflicts they have more priority. This looks good to me. 2. -foffload-abi=[lp64|ilp32] This option is supposed to tell mkoffload (and offload compiler) which ABI is used in streamed GIMPLE. This option is desirable, because host and offload compilers must have the same ABI. The option is generated by the host compiler automatically, it should not be specified by user. But I'd like to understand why is this one needed. Why should the compilers care? Aggregates layout and alignment of integral/floating types must match between host and offload compilers, sure, but isn't that something streamed already in the LTO bytecode? Or is LTO streamer not streaming some types like long_type_node? I'd expect if host and offload compiler disagree on long type size that you'd just use a different integral type with the same size as long on the host. Different sized pointers are of course a bigger problem, but can't you just error out on that during reading of the LTO, or even handle it (just use some integral type for when is the pointer stored in memory, and just convert to pointer after reads from memory, and convert back before storing to memory). Erroring out during LTO streaming in sounds just fine to me though. Jakub
[PATCHv5][PING] Vimrc config with GNU formatting
On 10/02/2014 09:14 PM, Yury Gribov wrote: On 09/17/2014 09:08 PM, Yury Gribov wrote: On 09/16/2014 08:38 PM, Yury Gribov wrote: Hi all, This is the third version of the patch. A list of changes since last version: * move config to contrib so that it's _not_ enabled by default (current score is 2/1 in favor of no Vim config by default) * update Makefile.in to make .local.vimrc if developer asks for it * disable autoformatting for flex files * fix filtering of non-GNU sources (libsanitizer) * added some small fixes in cinoptions based on feedback from community As noted by Richard, the config does not do a good job of formatting unbound {} blocks e.g. void foo () { int x; { // I'm an example of bad bad formatting } } but it seems to be the best we can get with Vim's cindent (and I don't think anyone seriously considers writing a custom indentexpr). Ok to commit? New vesion with support for another popular local .vimrc plugin. Hi all, Here is a new vesion of vimrc patch. Hope I got email settings right this time. Changes since v4: * fixed and enhanced docs * added support for .lvimrc in Makefile * minor fixes in cinoptions and formatoptions (reported by Segher) * removed shiftwidth settings (as it does not really relate to code formatting) -Y commit 3f560e9dd16a5e914b6f2ba82edffe13dfde944c Author: Yury Gribov y.gri...@samsung.com Date: Thu Oct 2 15:50:52 2014 +0400 2014-10-02 Laurynas Biveinis laurynas.bivei...@gmail.com Yury Gribov y.gri...@samsung.com Vim config with GNU formatting. contrib/ * vimrc: New file. / * .gitignore: Added .local.vimrc and .lvimrc. * Makefile.tpl (vimrc, .lvimrc, .local.vimrc): New targets. * Makefile.in: Regenerate. diff --git a/.gitignore b/.gitignore index e9b56be..ab97ac6 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,9 @@ POTFILES TAGS TAGS.sub +.local.vimrc +.lvimrc + .gdbinit .gdb_history diff --git a/Makefile.in b/Makefile.in index d6105b3..f3a34af 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2384,6 +2384,18 @@ mail-report-with-warnings.log: warning.log chmod +x $@ echo If you really want to send e-mail, run ./$@ now +# Local Vim config + +$(srcdir)/.local.vimrc: + $(LN_S) $(srcdir)/contrib/vimrc $@ + +$(srcdir)/.lvimrc: + $(LN_S) $(srcdir)/contrib/vimrc $@ + +vimrc: $(srcdir)/.local.vimrc $(srcdir)/.lvimrc + +.PHONY: vimrc + # Installation targets. .PHONY: install uninstall diff --git a/Makefile.tpl b/Makefile.tpl index f7c7e38..b98930c 100644 --- a/Makefile.tpl +++ b/Makefile.tpl @@ -867,6 +867,18 @@ mail-report-with-warnings.log: warning.log chmod +x $@ echo If you really want to send e-mail, run ./$@ now +# Local Vim config + +$(srcdir)/.local.vimrc: + $(LN_S) $(srcdir)/contrib/vimrc $@ + +$(srcdir)/.lvimrc: + $(LN_S) $(srcdir)/contrib/vimrc $@ + +vimrc: $(srcdir)/.local.vimrc $(srcdir)/.lvimrc + +.PHONY: vimrc + # Installation targets. .PHONY: install uninstall diff --git a/contrib/vimrc b/contrib/vimrc new file mode 100644 index 000..34e8f35 --- /dev/null +++ b/contrib/vimrc @@ -0,0 +1,45 @@ + Code formatting settings for Vim. + + To enable this for GCC files by default, you can either source this file + in your .vimrc via autocmd: + :au BufNewFile,BufReadPost path/to/gcc/* :so path/to/gcc/contrib/vimrc + or source the script manually for each newly opened file: + :so contrib/vimrc + You could also use numerous plugins that enable local vimrc e.g. + mbr's localvimrc or thinca's vim-localrc (but note that the latter + is much less secure). To install local vimrc config, run + $ make vimrc + from GCC build folder. + + Copyright (C) 2014 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see http://www.gnu.org/licenses/. + +function! SetStyle() + let l:fname = expand(%:p) + if stridx(l:fname, 'libsanitizer') != -1 +return + endif + let l:ext = fnamemodify(l:fname, :e) + let l:c_exts = ['c', 'h', 'cpp', 'cc', 'C', 'H', 'def', 'java'] + if index(l:c_exts, l:ext) != -1 +setlocal cindent +setlocal softtabstop=2 +setlocal cinoptions=4,n-2,{2,^-2,:2,=2,g0,f0,h2,p4,t0,+2,(0,u0,w1,m0 +setlocal textwidth=80 +setlocal formatoptions-=ro formatoptions+=cqlt + endif +endfunction + +call SetStyle()
Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling
On 13 Oct 12:19, Jakub Jelinek wrote: But I'd like to understand why is this one needed. Why should the compilers care? Aggregates layout and alignment of integral/floating types must match between host and offload compilers, sure, but isn't that something streamed already in the LTO bytecode? Or is LTO streamer not streaming some types like long_type_node? I'd expect if host and offload compiler disagree on long type size that you'd just use a different integral type with the same size as long on the host. Different sized pointers are of course a bigger problem, but can't you just error out on that during reading of the LTO, or even handle it (just use some integral type for when is the pointer stored in memory, and just convert to pointer after reads from memory, and convert back before storing to memory). Erroring out during LTO streaming in sounds just fine to me though. Actually this option was developed by Bernd, so I think PTX team is going to use it somehow. In MIC's case we're planning just to check in mkoffload that host and target compiler's ABI are the same. Without this check we will crash in LTO streamer with ICE, so I'd like to issue an error message, rather than crashing. -- Ilya
Re: Fix libgomp crash without TLS (PR42616)
Now, I wonder on which OS and why does config/tls.m4 CHECK_GCC_TLS actually fail? Can you figure that out? On Android check passes with --disable-tls (standard while building gcc for Android as TLS is not supported in bionic) and fails with --enable-tls (i686-linux-android/libgomp/conftest.c:32: undefined reference to `___tls_get_addr'). So, HAVE_TLS is not defined in both cases. If we get rid of HAVE_TLS code altogether, we might lose support of some very old OSes, e.g. some Linux distros with a recent gcc and binutils (so that emutls isn't used), but very old glibc (that doesn't support TLS or supports it incorrectly, think of pre-2002 glibc). So, if we get rid of !HAVE_TLS code in libgomp, it would be nice if config/tls.m4 detected it properly and we'd just fail at configure time. How can we check this in config/tls.m4? Can we just combine tests on TLS and emutls? E.g. check whether HAVE_TLS and USE_EMUTLS are both defined. And if we don't, just make sure that on Android, Darwin and/or M$Win (or whatever other OS you had in mind which does support pthreads, but doesn't support native TLS) find out why HAVE_AS_TLS is not defined (guess config.log should explain that). HAVE_AS_TLS is also not defined for Android as it depends on --enable-tls.
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [6/n] Instrument calls and returns
On 10 Oct 12:50, Jeff Law wrote: On 10/08/14 13:04, Ilya Enkovich wrote: Hi, This patch adds intrumentation of calls and returns into instrumentation pass. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_add_bounds_to_ret_stmt): New. (chkp_replace_address_check_builtin): New. (chkp_replace_extract_builtin): New. (chkp_find_bounds_for_elem): New. (chkp_add_bounds_to_call_stmt): New. (chkp_instrument_function): Instrument rets and calls. [ snip ] + /* Additionall we need to add bounds s/Additionall/Additionally/ OK with that nit fixed. jeff Here is a fixed version. Thanks, Ilya -- diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index 12f8f4a..0d4577d1 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -1047,6 +1047,29 @@ chkp_get_registered_bounds (tree ptr) return slot ? *slot : NULL_TREE; } +/* Add bound retvals to return statement pointed by GSI. */ + +static void +chkp_add_bounds_to_ret_stmt (gimple_stmt_iterator *gsi) +{ + gimple ret = gsi_stmt (*gsi); + tree retval = gimple_return_retval (ret); + tree ret_decl = DECL_RESULT (cfun-decl); + tree bounds; + + if (!retval) +return; + + if (BOUNDED_P (ret_decl)) +{ + bounds = chkp_find_bounds (retval, gsi); + bounds = chkp_maybe_copy_and_register_bounds (ret_decl, bounds); + gimple_return_set_retbnd (ret, bounds); +} + + update_stmt (ret); +} + /* Force OP to be suitable for using as an argument for call. New statements (if any) go to SEQ. */ static tree @@ -1169,6 +1192,64 @@ chkp_check_mem_access (tree first, tree last, tree bounds, chkp_check_upper (last, bounds, iter, location, dirflag); } +/* Replace call to _bnd_chk_* pointed by GSI with + bndcu and bndcl calls. DIRFLAG determines whether + check is for read or write. */ + +void +chkp_replace_address_check_builtin (gimple_stmt_iterator *gsi, + tree dirflag) +{ + gimple_stmt_iterator call_iter = *gsi; + gimple call = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (call); + tree addr = gimple_call_arg (call, 0); + tree bounds = chkp_find_bounds (addr, gsi); + + if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_LBOUNDS + || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_BOUNDS) +chkp_check_lower (addr, bounds, *gsi, gimple_location (call), dirflag); + + if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_UBOUNDS) +chkp_check_upper (addr, bounds, *gsi, gimple_location (call), dirflag); + + if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_CHECK_PTR_BOUNDS) +{ + tree size = gimple_call_arg (call, 1); + addr = fold_build_pointer_plus (addr, size); + addr = fold_build_pointer_plus_hwi (addr, -1); + chkp_check_upper (addr, bounds, *gsi, gimple_location (call), dirflag); +} + + gsi_remove (call_iter, true); +} + +/* Replace call to _bnd_get_ptr_* pointed by GSI with + corresponding bounds extract call. */ + +void +chkp_replace_extract_builtin (gimple_stmt_iterator *gsi) +{ + gimple call = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (call); + tree addr = gimple_call_arg (call, 0); + tree bounds = chkp_find_bounds (addr, gsi); + gimple extract; + + if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_GET_PTR_LBOUND) +fndecl = chkp_extract_lower_fndecl; + else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CHKP_GET_PTR_UBOUND) +fndecl = chkp_extract_upper_fndecl; + else +gcc_unreachable (); + + extract = gimple_build_call (fndecl, 1, bounds); + gimple_call_set_lhs (extract, gimple_call_lhs (call)); + chkp_mark_stmt (extract); + + gsi_replace (gsi, extract, false); +} + /* Return COMPONENT_REF accessing FIELD in OBJ. */ static tree chkp_build_component_ref (tree obj, tree field) @@ -1247,6 +1328,82 @@ chkp_can_be_shared (tree t) return false; } +/* Helper function for chkp_add_bounds_to_call_stmt. + Fill ALL_BOUNDS output array with created bounds. + + OFFS is used for recursive calls and holds basic + offset of TYPE in outer structure in bits. + + ITER points a position where bounds are searched. + + ALL_BOUNDS[i] is filled with elem bounds if there + is a field in TYPE which has pointer type and offset + equal to i * POINTER_SIZE in bits. */ +static void +chkp_find_bounds_for_elem (tree elem, tree *all_bounds, + HOST_WIDE_INT offs, + gimple_stmt_iterator *iter) +{ + tree type = TREE_TYPE (elem); + + if (BOUNDED_TYPE_P (type)) +{ + if (!all_bounds[offs / POINTER_SIZE]) + { + tree temp = make_temp_ssa_name (type, gimple_build_nop (), ); + gimple assign = gimple_build_assign (temp, elem); + gimple_stmt_iterator gsi; + + gsi_insert_before (iter, assign, GSI_SAME_STMT); + gsi = gsi_for_stmt (assign); + + all_bounds[offs / POINTER_SIZE] = chkp_find_bounds (temp,
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [7/n] Instrument params
On 10 Oct 12:10, Jeff Law wrote: On 10/08/14 13:06, Ilya Enkovich wrote: Hi, This patch adds bounds initialization for address taken input arguments. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_instrument_function): Store bounds for address taken args. diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index 6bbceb0..5443950 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -3758,6 +3758,51 @@ chkp_instrument_function (void) bb = next; } while (bb); + + /* Some input params may have bounds and be address taken. In this case + we should store incomping bounds into bounds table. */ s/incomping/incoming/ With that nit fixed this is OK. jeff Here is a fixed version. Thanks, Ilya -- diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index 0d4577d1..53b3386 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -3758,6 +3758,51 @@ chkp_instrument_function (void) bb = next; } while (bb); + + /* Some input params may have bounds and be address taken. In this case + we should store incoming bounds into bounds table. */ + tree arg; + if (flag_chkp_store_bounds) +for (arg = DECL_ARGUMENTS (cfun-decl); arg; arg = DECL_CHAIN (arg)) + if (TREE_ADDRESSABLE (arg)) + { + if (BOUNDED_P (arg)) + { + tree bounds = chkp_get_next_bounds_parm (arg); + tree def_ptr = ssa_default_def (cfun, arg); + gimple_stmt_iterator iter + = gsi_start_bb (chkp_get_entry_block ()); + chkp_build_bndstx (chkp_build_addr_expr (arg), +def_ptr ? def_ptr : arg, +bounds, iter); + + /* Skip bounds arg. */ + arg = TREE_CHAIN (arg); + } + else if (chkp_type_has_pointer (TREE_TYPE (arg))) + { + tree orig_arg = arg; + bitmap slots = chkp_find_bound_slots (TREE_TYPE (arg)); + gimple_stmt_iterator iter + = gsi_start_bb (chkp_get_entry_block ()); + bitmap_iterator bi; + unsigned bnd_no; + + EXECUTE_IF_SET_IN_BITMAP (slots, 0, bnd_no, bi) + { + tree bounds = chkp_get_next_bounds_parm (arg); + HOST_WIDE_INT offs = bnd_no * POINTER_SIZE / BITS_PER_UNIT; + tree addr = chkp_build_addr_expr (orig_arg); + tree ptr = build2 (MEM_REF, ptr_type_node, addr, +build_int_cst (ptr_type_node, offs)); + chkp_build_bndstx (chkp_build_addr_expr (ptr), ptr, +bounds, iter); + + arg = DECL_CHAIN (arg); + } + BITMAP_FREE (slots); + } + } } /* Initialize pass. */
Re: [PATCH x86] Increase PARAM_MAX_COMPLETELY_PEELED_INSNS when branch is costly
I need to collect data from Haswell, but the patch should not help it's performance much, just increase code size. On Mon, Oct 13, 2014 at 12:01 PM, Richard Biener richard.guent...@gmail.com wrote: On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko evstu...@gmail.com wrote: Hi, The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. Bootstrap and make check are in progress. The patch boosts (up to 2,5 times improve) several benchmarks compiled with -Ofast on Silvermont Spec2000: +5% gain on 173.applu +1% gain on 255.vortex Is it ok for trunk when pass bootstrap and make check? This is only a 20% increase - from 100 to 120. I would instead suggest to explore doing this change unconditionally if it helps that much. Richard. Thanks, Evgeny 2014-10-10 Evgeny Stupachenko evstu...@gmail.com * config/i386/i386.c (ix86_option_override_internal): Increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates CPUs with high branch cost. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6337aa5..5ac10eb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, opts-x_param_values, opts_set-x_param_values); + /* Extend full peel max insns parameter for CPUs with high branch cost. */ + if (TARGET_HIGH_BRANCH_COST) +maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, + 120, + opts-x_param_values, + opts_set-x_param_values); + + /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ if (opts-x_flag_prefetch_loop_arrays 0 HAVE_prefetch diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2c64162..da0c57b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_INTER_UNIT_CONVERSIONS \ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] +#define TARGET_HIGH_BRANCH_COST ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] #define TARGET_SCHEDULEix86_tune_features[X86_TUNE_SCHEDULE] #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index b6b210e..04d8bf8 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, four_jump_limit, m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | m_ATHLON_K8 | m_AMDFAM10) +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This could be + used to tune unroll, if-cvt, inline... heuristics. */ +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, high_branch_cost, + m_BONNELL | m_SILVERMONT | m_INTEL) + /*/ /* Integer instruction selection tuning */ /*/
Re: [PATCH] cleanup in c-parser
I updated the patch with a comment. Actually, Manuel handed me this patch just to help me get familiar with the process of submitting and testing. Generating this one with git diff looks different so I'm not sure if that's a problem or not. Thanks, Anthony On Sun, Oct 12, 2014 at 10:09 PM, pins...@gmail.com wrote: On Oct 12, 2014, at 12:37 PM, Anthony Brandon anthony.bran...@gmail.com wrote: Hi, I'm a new contributor and I don't yet have a copyright assignment or commit access. Thanks for you contribution. Your new function is missing a comment before it saying what it does. Yes it might be obvious what the function does but the coding style requires it. Thanks, Andrew This is a cleanup of code duplication in c-parser. I bootstrapped and tested on x86_64-linux. gcc/c/ChangeLog: 2014-10-12 Anthony Brandon anthony.bran...@gmail.com * c-parser.c (c_parser_all_labels): New function to replace the duplicate code. (c_parser_statement): Call the new function. cleanup.diff -- Anthony diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 0d159fd..346448a 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -4654,6 +4654,18 @@ c_parser_compound_statement_nostart (c_parser *parser) mark_valid_location_for_stdc_pragma (save_valid_for_pragma); } +/* Parse all consecutive labels. */ + +static void +c_parser_all_labels (c_parser *parser) +{ + while (c_parser_next_token_is_keyword (parser, RID_CASE) +|| c_parser_next_token_is_keyword (parser, RID_DEFAULT) +|| (c_parser_next_token_is (parser, CPP_NAME) + c_parser_peek_2nd_token (parser)-type == CPP_COLON)) +c_parser_label (parser); +} + /* Parse a label (C90 6.6.1, C99 6.8.1). label: @@ -4854,11 +4866,7 @@ c_parser_label (c_parser *parser) static void c_parser_statement (c_parser *parser) { - while (c_parser_next_token_is_keyword (parser, RID_CASE) -|| c_parser_next_token_is_keyword (parser, RID_DEFAULT) -|| (c_parser_next_token_is (parser, CPP_NAME) - c_parser_peek_2nd_token (parser)-type == CPP_COLON)) -c_parser_label (parser); + c_parser_all_labels (parser); c_parser_statement_after_labels (parser); } @@ -5090,11 +5098,7 @@ c_parser_if_body (c_parser *parser, bool *if_p) { tree block = c_begin_compound_stmt (flag_isoc99); location_t body_loc = c_parser_peek_token (parser)-location; - while (c_parser_next_token_is_keyword (parser, RID_CASE) -|| c_parser_next_token_is_keyword (parser, RID_DEFAULT) -|| (c_parser_next_token_is (parser, CPP_NAME) - c_parser_peek_2nd_token (parser)-type == CPP_COLON)) -c_parser_label (parser); + c_parser_all_labels (parser); *if_p = c_parser_next_token_is_keyword (parser, RID_IF); if (c_parser_next_token_is (parser, CPP_SEMICOLON)) { @@ -5121,11 +5125,7 @@ c_parser_else_body (c_parser *parser) { location_t else_loc = c_parser_peek_token (parser)-location; tree block = c_begin_compound_stmt (flag_isoc99); - while (c_parser_next_token_is_keyword (parser, RID_CASE) -|| c_parser_next_token_is_keyword (parser, RID_DEFAULT) -|| (c_parser_next_token_is (parser, CPP_NAME) - c_parser_peek_2nd_token (parser)-type == CPP_COLON)) -c_parser_label (parser); + c_parser_all_labels (parser); if (c_parser_next_token_is (parser, CPP_SEMICOLON)) { location_t loc = c_parser_peek_token (parser)-location;
Re: [PATCH 1/2] libstdc++: Add std::align.
On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote: On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote: Fixed (the new function should be in namespace std!) and applied to trunk. Thanks for the patch. Thanks for merging and improving my patches! Sorry it's taken so long, I needed to get some of the ABI transition work done first. If you have the time maybe you could also look at the put_time patch. Unlike get_time it does not require an ABI change. OK great. I also revisited your codecvt proposal, but I don't think you ever sent a patch, only the RFC about the design. Do you have a finished patch?
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [10/n] Stores handler
2014-10-09 22:51 GMT+04:00 Jeff Law l...@redhat.com: On 10/08/14 13:12, Ilya Enkovich wrote: Hi, This patch adds an assignment processing function which is used by lnliner for newly generated stores. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_copy_bounds_for_assign): New. * tree-chkp.h (chkp_copy_bounds_for_assign): New. This probably should have been part of the inliner submission since that's the only place its used and one needs the inliner context to know how this function is going to be used. Presumably the reason its not in tree-inline and static is you want to utilize chkp_walk_pointer_assignments? The code is fine, just want to make sure its goes into a logical place. Jeff I have to export either chkp_copy_bounds_for_assign or chkp_walk_pointer_assignments with chkp_copy_bounds_for_elem. No much difference but I'd prefer to keep all memrefs processing codes in tree-chkp.c. Ilya
Re: [libstdc++] Add xmethods for array, deque, forward_list, list and vector.
On 12/10/14 06:49 -0700, Siva Chandra wrote: Hello, Attached is a patch which adds xmethods for std::array, std::deque, std::forward_list, std::list and std::vector. There were already couple of xmethods existing for std::vector, but this patch adds more over them. Committed - thanks!
Re: [PATCH 2/3] libstdc++: Add put_time support.
On 15/04/14 23:20 +0200, Rüdiger Sonderfeld wrote: Described in [ext.manip]. * libstdc++-v3/include/std/iomanip (_Put_time): New struct. (put_time): New manipulator. (operator): New overloaded function. * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/1.cc: * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc: * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/1.cc: * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/2.cc: New file. The 27_io/manipulators/extended/put_time/char/2.cc and 27_io/manipulators/extended/put_time/wchar_t/2.cc tests fail for me. i2.exe: /home/jwakely/src/gcc/gcc/libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc:41: void test01(): Assertion `oss.str() == Son 1971' failed. FAIL: 27_io/manipulators/extended/put_time/char/2.cc execution test
Re: [PATCH 1/2] libstdc++: Add std::align.
On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote: On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote: Fixed (the new function should be in namespace std!) and applied to trunk. Thanks for the patch. Thanks for merging and improving my patches! If you have the time maybe you could also look at the put_time patch. Unlike get_time it does not require an ABI change. I was about to close https://gcc.gnu.org/PR57350 as fixed, but the test in the SO question it links to fails with your implementation that I've just committed: https://stackoverflow.com/questions/16305311/usage-issue-of-stdalign
Re: [libstdc++] Add xmethods for array, deque, forward_list, list and vector.
On Mon, Oct 13, 2014 at 4:23 AM, Jonathan Wakely jwak...@redhat.com wrote: On 12/10/14 06:49 -0700, Siva Chandra wrote: Hello, Attached is a patch which adds xmethods for std::array, std::deque, std::forward_list, std::list and std::vector. There were already couple of xmethods existing for std::vector, but this patch adds more over them. Committed - thanks! That was fast. Thanks a lot! - Siva Chandra
Re: [PATCH x86] Increase PARAM_MAX_COMPLETELY_PEELED_INSNS when branch is costly
On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko evstu...@gmail.com wrote: Hi, The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. Bootstrap and make check are in progress. The patch boosts (up to 2,5 times improve) several benchmarks compiled with -Ofast on Silvermont Spec2000: +5% gain on 173.applu +1% gain on 255.vortex Is it ok for trunk when pass bootstrap and make check? This is only a 20% increase - from 100 to 120. I would instead suggest to explore doing this change unconditionally if it helps that much. Agreed, I think the value of 100 was set decade ago by Zdenek and me completely artifically. I do not recall any serious tuning of this flag. Note that I plan to update https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html to current tree so PARAM_MAX_COMPLETELY_PEELED_INSNS will be used at gimple level rather than tree changing its meaning somewhat. Perhaps I could try to find time this or next week to update the patch so we do not need to do the tuning twice. Honza Richard. Thanks, Evgeny 2014-10-10 Evgeny Stupachenko evstu...@gmail.com * config/i386/i386.c (ix86_option_override_internal): Increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates CPUs with high branch cost. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6337aa5..5ac10eb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, opts-x_param_values, opts_set-x_param_values); + /* Extend full peel max insns parameter for CPUs with high branch cost. */ + if (TARGET_HIGH_BRANCH_COST) +maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, + 120, + opts-x_param_values, + opts_set-x_param_values); + + /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ if (opts-x_flag_prefetch_loop_arrays 0 HAVE_prefetch diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2c64162..da0c57b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_INTER_UNIT_CONVERSIONS \ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] +#define TARGET_HIGH_BRANCH_COST ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index b6b210e..04d8bf8 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, four_jump_limit, m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | m_ATHLON_K8 | m_AMDFAM10) +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This could be + used to tune unroll, if-cvt, inline... heuristics. */ +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, high_branch_cost, + m_BONNELL | m_SILVERMONT | m_INTEL) + /*/ /* Integer instruction selection tuning */ /*/
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [9/n] Static constructors
On 09 Oct 13:04, Jeff Law wrote: On 10/08/14 13:10, Ilya Enkovich wrote: Hi, This patch introduces functions to handle static pointers and static bounds. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (MAX_STMTS_IN_STATIC_CHKP_CTOR): New. (chkp_ctor_stmt_list): New. (chkp_register_var_initializer): New. (chkp_add_modification_to_stmt_list): New. (chkp_output_static_bounds): New. (chkp_finish_file): New. (chkp_instrument_function): Remove useless statements from static bounds constructors. * tree-chkp.h (chkp_register_var_initializer): New. (chkp_finish_file): New. Thanks for clarifying on the testcase. I misunderstood the testing methodology and hence the results made no sense to me :-) Make the maximum statements a PARAM diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index b424af8..4b5a773 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -394,6 +394,27 @@ static bool in_chkp_pass; #define CHKP_ZERO_BOUNDS_VAR_NAME __chkp_zero_bounds #define CHKP_NONE_BOUNDS_VAR_NAME __chkp_none_bounds +/* Static checker constructors may become very large and their + compilation with optimization may take too much time. + Therefore we put a limit to number of statements in one + construcor. Tests with 100 000 statically initialized s/construcor/constructor/ + static bounds initilization. If VAR is added into + bounds initlization list then 1 is returned. Otherwise s/initilization/initialization/ + into list of static initilizer statementes (passed in ARG). s/initilizer/initializer/ This will be fine with the change to a PARAM and the nit spelling stuff fixed. jeff Thanks for review! Here is a fixed version. Ilya -- 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (MAX_STMTS_IN_STATIC_CHKP_CTOR): New. (chkp_ctor_stmt_list): New. (chkp_register_var_initializer): New. (chkp_add_modification_to_stmt_list): New. (chkp_output_static_bounds): New. (chkp_finish_file): New. (chkp_instrument_function): Remove useless statements from static bounds constructors. * tree-chkp.h (chkp_register_var_initializer): New. (chkp_finish_file): New. * doc/invoke.texi (chkp-max-ctor-size): New. * params.def (PARAM_CHKP_MAX_CTOR_SIZE): New. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1d8ab03..8128dff 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10459,6 +10459,12 @@ is greater or equal to this number, use callbacks instead of inline checks. E.g. to disable inline code use @option{--param asan-instrumentation-with-call-threshold=0}. +@item chkp-max-ctor-size +Static constructors generated by Pointer Bounds Checker may become very +large and significantly increase compile time at optimization level +@option{-O1} and higher. This parameter is a maximum nubmer of statements +in a single generated constructor. Default value is 5000. + @end table @end table diff --git a/gcc/params.def b/gcc/params.def index aefdd07..af490e0 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1099,6 +1099,12 @@ DEFPARAM (PARAM_UNINIT_CONTROL_DEP_ATTEMPTS, Maximum number of nested calls to search for control dependencies during uninitialized variable analysis, 1000, 1, 0) + +DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE, + chkp-max-ctor-size, + Maximum number of statements to be included into a single static + constructor generated by Pointer Bounds Checker, + 5000, 100, 0) /* Local variables: diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index 0abe192..21c6138 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -65,6 +65,7 @@ along with GCC; see the file COPYING3. If not see #include rtl.h /* For MEM_P, assign_temp. */ #include tree-dfa.h #include ipa-chkp.h +#include params.h /* Pointer Bounds Checker instruments code with memory checks to find out-of-bounds memory accesses. Checks are performed by computing @@ -394,6 +395,27 @@ static bool in_chkp_pass; #define CHKP_ZERO_BOUNDS_VAR_NAME __chkp_zero_bounds #define CHKP_NONE_BOUNDS_VAR_NAME __chkp_none_bounds +/* Static checker constructors may become very large and their + compilation with optimization may take too much time. + Therefore we put a limit to number of statements in one + constructor. Tests with 100 000 statically initialized + pointers showed following compilation times on Sandy Bridge + server (used -O2): + limit100 = ~18 sec. + limit300 = ~22 sec. + limit 1000 = ~30 sec. + limit 3000 = ~49 sec. + limit 5000 = ~55 sec. + limit 1 = ~76 sec. + limit 10 = ~532 sec. */ +#define MAX_STMTS_IN_STATIC_CHKP_CTOR (PARAM_VALUE (PARAM_CHKP_MAX_CTOR_SIZE)) + +struct chkp_ctor_stmt_list +{ + tree stmts; + int avail; +}; + /* Return 1 if function FNDECL is
Fix bootstrap/63496 (pasto in ipa-polymorphic-call-context.c)
Hi, this patch fixes pasto in ipa-polymorphic-call.c. Of course I was not intending to compare offset+64 with offset ;) Honza PR bootstrap/63496 * ipa-polymorphic-call.c (extr_type_from_vtbl_ptr_store): Fix pasto. Index: ipa-polymorphic-call.c === --- ipa-polymorphic-call.c (revision 216141) +++ ipa-polymorphic-call.c (working copy) @@ -1235,7 +1235,7 @@ extr_type_from_vtbl_ptr_store (gimple st if (dump_file) fprintf (dump_file, wrong offset %i!=%i or size %i\n, (int)offset, (int)tci-offset, (int)size); - return offset + GET_MODE_BITSIZE (Pmode) = offset + return offset + GET_MODE_BITSIZE (Pmode) = tci-offset || (max_size != -1 tci-offset + GET_MODE_BITSIZE (Pmode) offset + max_size) ? error_mark_node : NULL;
Re: [PATCH 1/2] libstdc++: Add std::align.
On 13/10/14 13:10 +0100, Jonathan Wakely wrote: On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote: On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote: Fixed (the new function should be in namespace std!) and applied to trunk. Thanks for the patch. Thanks for merging and improving my patches! If you have the time maybe you could also look at the put_time patch. Unlike get_time it does not require an ABI change. I was about to close https://gcc.gnu.org/PR57350 as fixed, but the test in the SO question it links to fails with your implementation that I've just committed: https://stackoverflow.com/questions/16305311/usage-issue-of-stdalign I'm testing this fix. commit 60aa345d8122053f7c8ba7c743b458e799eb1455 Author: Jonathan Wakely jwak...@redhat.com Date: Mon Oct 13 14:02:39 2014 +0100 PR libstdc++/57350 * include/std/memory (align): Do not adjust correctly aligned address. * testsuite/20_util/align/2.cc: New. diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory index affc8b1..b5792ad 100644 --- a/libstdc++-v3/include/std/memory +++ b/libstdc++-v3/include/std/memory @@ -113,14 +113,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION inline void* align(size_t __align, size_t __size, void* __ptr, size_t __space) noexcept { - const size_t __diff = __align - reinterpret_castuintptr_t(__ptr) % __align; - if (__diff + __size = __space) + const auto __intptr = reinterpret_castuintptr_t(__ptr); + const auto __aligned = (__intptr - 1u + __align) -__align; + const auto __diff = __aligned - __intptr; + if ((__size + __diff) __space) return nullptr; else { __space -= __diff; - __ptr = static_castchar*(__ptr) + __diff; - return __ptr; + return __ptr = reinterpret_castvoid*(__aligned); } } diff --git a/libstdc++-v3/testsuite/20_util/align/2.cc b/libstdc++-v3/testsuite/20_util/align/2.cc new file mode 100644 index 000..efad56a --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/align/2.cc @@ -0,0 +1,42 @@ +// { dg-options -std=gnu++11 } + +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING3. If not see +// http://www.gnu.org/licenses/. + +// C++11 [ptr.align] (20.6.5): std::align + +#include memory +#include testsuite_hooks.h + +void +test01() +{ + bool test __attribute__((unused)) = true; + + int i = 0; + void* ptr = i; + auto space = sizeof(i); + auto p2 = std::align(alignof(i), space, ptr, space); + VERIFY( ptr == i ); + VERIFY( p2 == i ); + VERIFY(space == sizeof(i)); +} + +int main() +{ + test01(); +}
Re: [PATCH 3/5] IPA ICF pass
On 10/11/2014 10:19 AM, Jan Hubicka wrote: After few days of measurement and tuning, I was able to get numbers to the following shape: Execution times (seconds) phase setup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 1412 kB ( 0%) ggc phase opt and generate : 27.83 (59%) usr 0.66 (19%) sys 28.52 (37%) wall 1028813 kB (24%) ggc phase stream in : 16.90 (36%) usr 0.63 (18%) sys 17.60 (23%) wall 3246453 kB (76%) ggc phase stream out: 2.76 ( 6%) usr 2.19 (63%) sys 31.34 (40%) wall 2 kB ( 0%) ggc callgraph optimization : 0.36 ( 1%) usr 0.00 ( 0%) sys 0.35 ( 0%) wall 40 kB ( 0%) ggc ipa dead code removal : 3.31 ( 7%) usr 0.01 ( 0%) sys 3.25 ( 4%) wall 0 kB ( 0%) ggc ipa virtual call target : 3.69 ( 8%) usr 0.03 ( 1%) sys 3.80 ( 5%) wall 21 kB ( 0%) ggc ipa devirtualization: 0.12 ( 0%) usr 0.00 ( 0%) sys 0.15 ( 0%) wall 13704 kB ( 0%) ggc ipa cp : 1.11 ( 2%) usr 0.07 ( 2%) sys 1.17 ( 2%) wall 188558 kB ( 4%) ggc ipa inlining heuristics : 8.17 (17%) usr 0.14 ( 4%) sys 8.27 (11%) wall 494738 kB (12%) ggc ipa comdats : 0.12 ( 0%) usr 0.00 ( 0%) sys 0.12 ( 0%) wall 0 kB ( 0%) ggc ipa lto gimple in : 1.86 ( 4%) usr 0.40 (11%) sys 2.20 ( 3%) wall 537970 kB (13%) ggc ipa lto gimple out : 0.19 ( 0%) usr 0.08 ( 2%) sys 0.27 ( 0%) wall 2 kB ( 0%) ggc ipa lto decl in : 12.20 (26%) usr 0.37 (11%) sys 12.64 (16%) wall 2441687 kB (57%) ggc ipa lto decl out: 2.51 ( 5%) usr 0.21 ( 6%) sys 2.71 ( 3%) wall 0 kB ( 0%) ggc ipa lto constructors in : 0.13 ( 0%) usr 0.02 ( 1%) sys 0.17 ( 0%) wall 15692 kB ( 0%) ggc ipa lto constructors out: 0.03 ( 0%) usr 0.00 ( 0%) sys 0.03 ( 0%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 0.54 ( 1%) usr 0.09 ( 3%) sys 0.63 ( 1%) wall 407182 kB (10%) ggc ipa lto decl merge : 1.34 ( 3%) usr 0.00 ( 0%) sys 1.34 ( 2%) wall 8220 kB ( 0%) ggc ipa lto cgraph merge: 1.00 ( 2%) usr 0.00 ( 0%) sys 1.00 ( 1%) wall 14605 kB ( 0%) ggc whopr wpa : 0.92 ( 2%) usr 0.00 ( 0%) sys 0.89 ( 1%) wall 1 kB ( 0%) ggc whopr wpa I/O : 0.01 ( 0%) usr 1.90 (55%) sys 28.31 (37%) wall 0 kB ( 0%) ggc whopr partitioning : 2.81 ( 6%) usr 0.01 ( 0%) sys 2.83 ( 4%) wall 4943 kB ( 0%) ggc ipa reference : 1.34 ( 3%) usr 0.00 ( 0%) sys 1.35 ( 2%) wall 0 kB ( 0%) ggc ipa profile : 0.20 ( 0%) usr 0.01 ( 0%) sys 0.21 ( 0%) wall 0 kB ( 0%) ggc ipa pure const : 1.62 ( 3%) usr 0.00 ( 0%) sys 1.63 ( 2%) wall 0 kB ( 0%) ggc ipa icf : 2.65 ( 6%) usr 0.02 ( 1%) sys 2.68 ( 3%) wall 1352 kB ( 0%) ggc inline parameters : 0.00 ( 0%) usr 0.01 ( 0%) sys 0.00 ( 0%) wall 0 kB ( 0%) ggc tree SSA rewrite: 0.11 ( 0%) usr 0.01 ( 0%) sys 0.08 ( 0%) wall 18919 kB ( 0%) ggc tree SSA other : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc tree SSA incremental: 0.24 ( 1%) usr 0.01 ( 0%) sys 0.32 ( 0%) wall 11325 kB ( 0%) ggc tree operand scan : 0.15 ( 0%) usr 0.02 ( 1%) sys 0.18 ( 0%) wall 116283 kB ( 3%) ggc dominance frontiers : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.02 ( 0%) wall 0 kB ( 0%) ggc dominance computation : 0.13 ( 0%) usr 0.01 ( 0%) sys 0.16 ( 0%) wall 0 kB ( 0%) ggc varconst: 0.01 ( 0%) usr 0.02 ( 1%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc loop fini : 0.02 ( 0%) usr 0.00 ( 0%) sys 0.04 ( 0%) wall 0 kB ( 0%) ggc unaccounted todo: 0.55 ( 1%) usr 0.00 ( 0%) sys 0.56 ( 1%) wall 0 kB ( 0%) ggc TOTAL : 47.49 3.4877.46 4276682 kB and I was able to reduce function bodies loaded in WPA to 35% (from previous 55%). The main problem 35% means that 35% of all function bodies are compared with something else? That feels pretty high. but overall numbers are not so terrible. Currently, the pass is able to merge 32K functions. As you know, we group functions to so called classes. According to stats, average non-singular class size contains at the end of comparison 7.39 candidates and we have 5K such functions. Because we load body for each candidate in such groups, it gives us minimum number of loaded bodies: 37K. As we load 70K function, we have still place to improve. But I guess WPA body-less comparison is quite efficient. with speed was hidden in work list for congruence classes, where hash_set was used. I chose the data structure to support delete operation, but it was really slow. Thus, hash_set was replaced with linked list and a flag is used to identify if a set is
Re: [RFC: Patch, PR 60102] [4.9/4.10 Regression] powerpc fp-bit ices@dwf_regno
Maciej W. Rozycki wrote: On Thu, 9 Oct 2014, Maciej W. Rozycki wrote: Seeing Rohit got good results it has struck me that perhaps one of the patches I had previously reverted, to be able to compile GCC in the first place, interfered with this fix -- I backed out all the subsequent patches to test yours and Rohit's by themselves only. And it was actually the case, with this change: 2013-05-21 Christian Bruel christian.br...@st.com * dwarf2out.c (multiple_reg_loc_descriptor): Use dbx_reg_number for spanning registers. LEAF_REG_REMAP is supported only for contiguous registers. Set register size out of the PARALLEL loop. back in place, in addition to your fix, I get an all-passed score for gdb.base/store.exp. So your change looks good and my decision to back out the other patches unfortunate. I'll yet run full e500v2 testing now to double check, and let you know what the results are, within a couple of hours if things work well. It took a bit more because I saw some regressions that I wanted to investigate. In the end they turned out intermittent and the failures happen sometimes whether your change is applied or not. So I'm fine with your change, thanks for your work and patience. Thanks for verifying! David, is the patch OK to commit now? Bye, Ulrich -- Dr. Ulrich Weigand GNU/Linux compilers and toolchain ulrich.weig...@de.ibm.com
Re: [PATCH 3/5] IPA ICF pass
35% means that 35% of all function bodies are compared with something else? That feels pretty high. but overall numbers are not so terrible. Currently, the pass is able to merge 32K functions. As you know, we group functions to so called classes. According to stats, average non-singular class size contains at the end of comparison 7.39 candidates and we have 5K such functions. Because we load body for each candidate in such groups, it gives us minimum number of loaded bodies: 37K. As we load 70K function, we have still place to improve. But I guess WPA body-less comparison is quite efficient. OK, that seems resonable. with speed was hidden in work list for congruence classes, where hash_set was used. I chose the data structure to support delete operation, but it was really slow. Thus, hash_set was replaced with linked list and a flag is used to identify if a set is removed or not. Interesting, I would not expect bottleneck in a congruence solving :) The problem was just the hash_set that showed to be slow data structure for a set of operations needed in congruence solving. I have no clue who complicated can it be to implement release_body function to an operation that really releases the memory? I suppose one can keep the caches from streamer and free trees read. Freeing gimple statemnts, cfg should be relatively easy. Lets however first try to tune the implementation rather than try to this hack implemented. Explicit ggc_free calls traditionally tended to cause some negative reactions wrt memory fragmentation concerns. Agree with suggested approach. In future we actually may keep the duplicated functions in WPA memory and use corresponding body whenever the function is inlined to avoid disturbing debug info more than needed. Honza Markus' problem with -fprofile-use has been removed, IPA-ICF is preceding devirtualization pass. I hope it is fine? Yes, I think devirtualization should actually work better with identical virutal methods merged. We just need to be sure it sees through the newly introduced aliases (there should be no thunks for virutal methods) Thanks, Martin Honza
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [11/n] Optimization helpers
On 10 Oct 10:14, Jeff Law wrote: On 10/10/14 08:24, Ilya Enkovich wrote: On 09 Oct 12:09, Jeff Law wrote: On 10/08/14 13:16, Ilya Enkovich wrote: Hi, This patch introduces structures and manipulation functions used by simple checker optimizations. Structures are used to hold checks information - type of check and checked address in a polinomial form. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (check_type): New. (pol_item): New. (address_t): New. (check_info): New. (bb_checks): New. (chkp_pol_item_compare): New. (chkp_pol_find): New. (chkp_extend_const): New. (chkp_add_addr_item): New. (chkp_sub_addr_item): New. (chkp_add_addr_addr): New. (chkp_sub_addr_addr): New. (chkp_mult_addr): New. (chkp_is_constant_addr): New. (chkp_print_addr): New. (chkp_collect_addr_value): New. (chkp_collect_value): New. (chkp_fill_check_info): New. +/* Find plynomial item in ADDR with var equal to VAR s/plynomial/polynomial/ With nit fixed and functions moved into whatever new file gets created for the optimization work this will be OK. jeff Thanks for review! Here is a fixed version. Ilya -- 2014-10-10 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp-opt.c: New. * Makefile.in (OBJS): Add tree-chkp-opt.o. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d8c8488..cd45b29 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1389,6 +1389,7 @@ OBJS = \ tree-parloops.o \ tree-phinodes.o \ tree-chkp.o \ +tree-chkp-opt.o \ tree-predcom.o \ tree-pretty-print.o \ tree-profile.o \ diff --git a/gcc/tree-chkp-opt.c b/gcc/tree-chkp-opt.c new file mode 100644 index 000..103c4bb --- /dev/null +++ b/gcc/tree-chkp-opt.c @@ -0,0 +1,463 @@ +/* Pointer Bounds Checker optimization pass. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Ilya Enkovich (ilya.enkov...@intel.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ + +#include config.h +#include system.h +#include coretypes.h +#include tree-core.h +#include stor-layout.h +#include varasm.h +#include tree.h +#include target.h +#include tree-iterator.h +#include tree-cfg.h +#include langhooks.h +#include tree-pass.h +#include hashtab.h +#include diagnostic.h +#include ggc.h +#include output.h +#include internal-fn.h +#include is-a.h +#include predict.h +#include cfgloop.h +#include stringpool.h +#include tree-ssa-alias.h +#include tree-ssanames.h +#include tree-ssa-operands.h +#include tree-ssa-address.h +#include tree-ssa.h +#include ipa-inline.h +#include basic-block.h +#include tree-ssa-loop-niter.h +#include gimple-expr.h +#include gimple.h +#include tree-phinodes.h +#include gimple-ssa.h +#include ssa-iterators.h +#include gimple-pretty-print.h +#include gimple-iterator.h +#include gimplify.h +#include gimplify-me.h +#include print-tree.h +#include expr.h +#include tree-ssa-propagate.h +#include gimple-fold.h +#include gimple-walk.h +#include tree-dfa.h +#include tree-chkp.h Thanks. Looks good. As a follow-up, can you try to trim down what appear to be the over-zealous includes? It's a minor thing, but we are trying to be a bit saner about that kind of stuff than we've been in the past. If you've already done that, then, well, we've clearly still got a ways to go. For example, I can't see why you'd need output.h here :-0 Jeff Thanks for review! This includes list is from tree-chkp.c and surely is reducible. I also revisited tree-chkp.c and removed few includes from there. Here is a new version. Ilya -- 2014-10-10 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp-opt.c: New. * Makefile.in (OBJS): Add tree-chkp-opt.o. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d8c8488..cd45b29 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1389,6 +1389,7 @@ OBJS = \ tree-parloops.o \ tree-phinodes.o \ tree-chkp.o \ + tree-chkp-opt.o \ tree-predcom.o \ tree-pretty-print.o \ tree-profile.o \ diff --git a/gcc/tree-chkp-opt.c b/gcc/tree-chkp-opt.c new file mode 100644 index 000..08be848 --- /dev/null +++ b/gcc/tree-chkp-opt.c @@ -0,0 +1,447 @@ +/* Pointer Bounds Checker optimization pass. +
[C++ patch] PR tree-optimization/62053 (VLA arrays)
Hi, this patch triggers assert in tree-inline that verify that array type and its main variant have same size (as pointer). In this case we have two types that do have same size, but the expression is different. It is created in both cases by layout_type becuase C++ FE calls layout_type before linking the variants. I think it is safe to move the call and avoid the duplication. Bootstrapped/regtested x86_64-linux, OK? Honza PR tree-optimization/62053 * g++.dg/torture/pr62053.C: New testcase. * tree.c: Avoid ordering issue with layout_type. Index: testsuite/g++.dg/torture/pr62053.C === --- testsuite/g++.dg/torture/pr62053.C (revision 0) +++ testsuite/g++.dg/torture/pr62053.C (revision 0) @@ -0,0 +1,7 @@ +// { dg-do compile } +typedef int GType; +class YGTable +{ + YGTable () { GType a[m_fn1 ()]; } + int m_fn1 (); +}; Index: cp/tree.c === --- cp/tree.c (revision 216145) +++ cp/tree.c (working copy) @@ -863,12 +863,12 @@ build_cplus_array_type (tree elt_type, t { t = build_min_array_type (elt_type, index_type); set_array_type_canon (t, elt_type, index_type); - if (!dependent) - layout_type (t); TYPE_MAIN_VARIANT (t) = m; TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (m); TYPE_NEXT_VARIANT (m) = t; + if (!dependent) + layout_type (t); } }
[committed] Fix comment about RUNTESTFLAGS and parallelization
Hi, After Jakub's pre-approval https://gcc.gnu.org/ml/gcc/2014-10/msg00084.html I have committed the small attached patch as r216147. 2014-10-13 Christophe Lyon christophe.l...@linaro.org * Makefile.in: (check-%): Update comment, as RUNTESTFLAGS no longer impact parallelization. Christophe. Index: gcc/Makefile.in === --- gcc/Makefile.in (revision 216146) +++ gcc/Makefile.in (revision 216147) @@ -3632,10 +3632,9 @@ $(check_p_numbers))) # For parallelized check-% targets, this decides whether parallelization -# is desirable (if -jN is used and RUNTESTFLAGS doesn't contain anything -# but optional --target_board or --extra_opts arguments). If desirable, -# recursive make is run with check-parallel-$lang{,1,2,3,4,5} etc. goals, -# which can be executed in parallel, as they are run in separate directories. +# is desirable (if -jN is used). If desirable, recursive make is run with +# check-parallel-$lang{,1,2,3,4,5} etc. goals, which can be executed in +# parallel, as they are run in separate directories. # check-parallel-$lang{,1,2,3,4,5} etc. goals invoke runtest with # GCC_RUNTEST_PARALLELIZE_DIR var in the environment and runtest_file_p # dejaGNU procedure is overridden to additionally synchronize through
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [8/n] Remove useless builtin calls
On 10 Oct 10:11, Jeff Law wrote: On 10/10/14 08:52, Ilya Enkovich wrote: THanks, Jeff With this code we remove user builtins calls coming from source code. E.g.: p2 = (int *)__bnd_init_ptr_bounds (p1); *p2 = 0; which means p2 has value of p1 but has default bounds and following store is unchecked. These calls are important for instrumentation but useless after instrumentation. I don't think it is a part of checker optimizer because it doesn't optimize instrumentation code. Also this transformation is trivial enough for O0 and checker optimizer works starting from O2. Below is a version fixed according to Richard's comments. Thanks, Ilya -- 2014-10-10 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_remove_useless_builtins): New. (chkp_execute): Remove useless calls to Pointer Bounds Checker builtins. Tests instrumentation are still needed. With some basic tests and instrumentation this will be OK. I hate to be harping tests, but few developers are going to be familiar with the MPX and related infrastructure and those tests are critical to helping them know when they break something. Similarly if the plan is to iterate on improving things, then those basic functionality tests will ultimately save time as you can smoke test before running larger benchmarks. jeff Here is a version with tests added. Thanks, Ilya -- gcc/ 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_remove_useless_builtins): New. (chkp_execute): Remove useless calls to Pointer Bounds Checker builtins. gcc/testsuite 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * gcc.target/i386/chkp-builtins-1.c: New. * gcc.target/i386/chkp-builtins-2.c: New. * gcc.target/i386/chkp-builtins-3.c: New. * gcc.target/i386/chkp-builtins-4.c: New. diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-1.c b/gcc/testsuite/gcc.target/i386/chkp-builtins-1.c new file mode 100644 index 000..bcc1198 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */ +/* { dg-final { scan-tree-dump-not bnd_init_ptr_bounds chkp } } */ + +void * +chkp_test (void *p) +{ + return __builtin___bnd_init_ptr_bounds (p); +} diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-2.c b/gcc/testsuite/gcc.target/i386/chkp-builtins-2.c new file mode 100644 index 000..1f4a244 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */ +/* { dg-final { scan-tree-dump-not bnd_copy_ptr_bounds chkp } } */ + +void * +chkp_test (void *p, void *q) +{ + return __builtin___bnd_copy_ptr_bounds (p, q); +} diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-3.c b/gcc/testsuite/gcc.target/i386/chkp-builtins-3.c new file mode 100644 index 000..ea54ede --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */ +/* { dg-final { scan-tree-dump-not bnd_set_ptr_bounds chkp } } */ + +void * +chkp_test (void *p) +{ + return __builtin___bnd_set_ptr_bounds (p, 10); +} diff --git a/gcc/testsuite/gcc.target/i386/chkp-builtins-4.c b/gcc/testsuite/gcc.target/i386/chkp-builtins-4.c new file mode 100644 index 000..cee780b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/chkp-builtins-4.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp } */ +/* { dg-final { scan-tree-dump-not bnd_null_ptr_bounds chkp } } */ + +void * +chkp_test (void *p) +{ + return __builtin___bnd_null_ptr_bounds (p); +} diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index 9be153a..5957e45 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -3800,6 +3800,44 @@ chkp_instrument_function (void) } } +/* Find init/null/copy_ptr_bounds calls and replace them + with assignments. It should allow better code + optimization. */ + +static void +chkp_remove_useless_builtins () +{ + basic_block bb; + gimple_stmt_iterator gsi; + + FOR_EACH_BB_FN (bb, cfun) +{ + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi)) +{ + gimple stmt = gsi_stmt (gsi); + tree fndecl; + enum built_in_function fcode; + + /* Find builtins returning first arg and replace +them with assignments. */ + if (gimple_code (stmt) == GIMPLE_CALL + (fndecl = gimple_call_fndecl (stmt)) + DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL + (fcode = DECL_FUNCTION_CODE (fndecl)) + (fcode == BUILT_IN_CHKP_INIT_PTR_BOUNDS + || fcode == BUILT_IN_CHKP_NULL_PTR_BOUNDS + || fcode == BUILT_IN_CHKP_COPY_PTR_BOUNDS + || fcode ==
[PATCH][ARM]Add ACLE 2.0 predefined marco __ARM_FEATURE_IDIV
Hi all, This is a simple patch to add missing __ARM_FEATURE_IDIV__ predefined marco(ACLE 2.0) into TARGET_CPU_CPP_BUILTINS. Is it Okay to commit? gcc/ChangeLog: 2014-10-13 Renlin Li renlin...@arm.com * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add ACLE 2.0 predefined marco __ARM_FEATURE_IDIV__. diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 3623c70..941d1b4 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -164,7 +164,10 @@ extern char arm_arch_name[]; builtin_define (__ARM_EABI__); \ } \ if (TARGET_IDIV)\ - builtin_define (__ARM_ARCH_EXT_IDIV__); \ + { \ +builtin_define (__ARM_ARCH_EXT_IDIV__); \ +builtin_define (__ARM_FEATURE_IDIV__); \ + } } while (0) #include config/arm/arm-opts.h
Re: [PATCH 1/2] libstdc++: Add std::align.
On 13/10/14 14:04 +0100, Jonathan Wakely wrote: On 13/10/14 13:10 +0100, Jonathan Wakely wrote: On 13/10/14 12:35 +0200, Rüdiger Sonderfeld wrote: On Monday 13 October 2014 11:03:51 Jonathan Wakely wrote: Fixed (the new function should be in namespace std!) and applied to trunk. Thanks for the patch. Thanks for merging and improving my patches! If you have the time maybe you could also look at the put_time patch. Unlike get_time it does not require an ABI change. I was about to close https://gcc.gnu.org/PR57350 as fixed, but the test in the SO question it links to fails with your implementation that I've just committed: https://stackoverflow.com/questions/16305311/usage-issue-of-stdalign I'm testing this fix. Committed to trunk. commit 60aa345d8122053f7c8ba7c743b458e799eb1455 Author: Jonathan Wakely jwak...@redhat.com Date: Mon Oct 13 14:02:39 2014 +0100 PR libstdc++/57350 * include/std/memory (align): Do not adjust correctly aligned address. * testsuite/20_util/align/2.cc: New. diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory index affc8b1..b5792ad 100644 --- a/libstdc++-v3/include/std/memory +++ b/libstdc++-v3/include/std/memory @@ -113,14 +113,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION inline void* align(size_t __align, size_t __size, void* __ptr, size_t __space) noexcept { - const size_t __diff = __align - reinterpret_castuintptr_t(__ptr) % __align; - if (__diff + __size = __space) + const auto __intptr = reinterpret_castuintptr_t(__ptr); + const auto __aligned = (__intptr - 1u + __align) -__align; + const auto __diff = __aligned - __intptr; + if ((__size + __diff) __space) return nullptr; else { __space -= __diff; - __ptr = static_castchar*(__ptr) + __diff; - return __ptr; + return __ptr = reinterpret_castvoid*(__aligned); } } diff --git a/libstdc++-v3/testsuite/20_util/align/2.cc b/libstdc++-v3/testsuite/20_util/align/2.cc new file mode 100644 index 000..efad56a --- /dev/null +++ b/libstdc++-v3/testsuite/20_util/align/2.cc @@ -0,0 +1,42 @@ +// { dg-options -std=gnu++11 } + +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING3. If not see +// http://www.gnu.org/licenses/. + +// C++11 [ptr.align] (20.6.5): std::align + +#include memory +#include testsuite_hooks.h + +void +test01() +{ + bool test __attribute__((unused)) = true; + + int i = 0; + void* ptr = i; + auto space = sizeof(i); + auto p2 = std::align(alignof(i), space, ptr, space); + VERIFY( ptr == i ); + VERIFY( p2 == i ); + VERIFY(space == sizeof(i)); +} + +int main() +{ + test01(); +}
[PATCH][AARCH64]Remove unused variable and marco
Hi all, This is a simple patch to remove unused variables and marco. Is it Okay to commit? gcc/ChangeLog: 2014-10-13 Renlin Li renlin...@arm.com * config/aarch64/aarch64.h (ARM_DEFAULT_PCS, arm_pcs_variant): Delete. Regards, Renlin LiFrom 88b6dc7fccf70c140070fbc5472d9bc1a8590754 Mon Sep 17 00:00:00 2001 From: Renlin Li renlin...@arm.com Date: Mon, 13 Oct 2014 09:30:57 +0100 Subject: [PATCH 2/6] remove unused ARM_DEFAULT_PCS marco and arm_pcs_variant --- gcc/config/aarch64/aarch64.h |4 1 file changed, 4 deletions(-) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 73b68ee..77f4511 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -578,11 +578,7 @@ enum arm_pcs }; -extern enum arm_pcs arm_pcs_variant; -#ifndef ARM_DEFAULT_PCS -#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64 -#endif /* We can't use enum machine_mode inside a generator file because it hasn't been created yet; we shouldn't be using any code that -- 1.7.9.5
[jit] Avoid overwhelming expect's buffer
Committed to git branch dmalcolm/jit: expect has a default buffer size of 2000 bytes (match_max). If the testsuite programs send large amounts of data to stdout/stderr, they could overwhelm this buffer. Avoid this. gcc/testsuite/ChangeLog.jit: * jit.dg/harness.h [MAKE_DEJAGNU_H_THREADSAFE] (note): Redefine note from dejagnu.h to new function dejagnu_note so that we can make note be threadsafe. (set_options): Don't enable GCC_JIT_BOOL_OPTION_DUMP_SUMMARY, since it can generate large amounts of output that could overwhelm expect's buffer. * jit.dg/test-dot-product.c (verify_code): Use note rather than printf, to give DejaGnu more chances to parse this log data, rather than overflowing its buffer. * jit.dg/test-factorial.c (verify_code): Likewise. * jit.dg/test-fibonacci.c (verify_code): Likewise. * jit.dg/test-fuzzer.c (main): Likewise. * jit.dg/test-nested-loops.c (verify_code): Likewise. * jit.dg/test-sum-of-squares.c (verify_code): Likewise. * jit.dg/test-threads.c (note): New function, adding thread-safety on top of dejagnu_note, the latter being the implementation found in dejagnu.h. (run_threaded_test): Use note rather than printf. --- gcc/testsuite/ChangeLog.jit| 21 + gcc/testsuite/jit.dg/harness.h | 4 +++- gcc/testsuite/jit.dg/test-dot-product.c| 2 +- gcc/testsuite/jit.dg/test-factorial.c | 2 +- gcc/testsuite/jit.dg/test-fibonacci.c | 2 +- gcc/testsuite/jit.dg/test-fuzzer.c | 4 ++-- gcc/testsuite/jit.dg/test-nested-loops.c | 2 +- gcc/testsuite/jit.dg/test-sum-of-squares.c | 2 +- gcc/testsuite/jit.dg/test-threads.c| 26 +- 9 files changed, 52 insertions(+), 13 deletions(-) diff --git a/gcc/testsuite/ChangeLog.jit b/gcc/testsuite/ChangeLog.jit index 798f7c9..7703212 100644 --- a/gcc/testsuite/ChangeLog.jit +++ b/gcc/testsuite/ChangeLog.jit @@ -1,3 +1,24 @@ +2014-10-13 David Malcolm dmalc...@redhat.com + + * jit.dg/harness.h [MAKE_DEJAGNU_H_THREADSAFE] (note): Redefine + note from dejagnu.h to new function dejagnu_note so that we can + make note be threadsafe. + (set_options): Don't enable GCC_JIT_BOOL_OPTION_DUMP_SUMMARY, + since it can generate large amounts of output that could overwhelm + expect's buffer. + * jit.dg/test-dot-product.c (verify_code): Use note rather than + printf, to give DejaGnu more chances to parse this log data, + rather than overflowing its buffer. + * jit.dg/test-factorial.c (verify_code): Likewise. + * jit.dg/test-fibonacci.c (verify_code): Likewise. + * jit.dg/test-fuzzer.c (main): Likewise. + * jit.dg/test-nested-loops.c (verify_code): Likewise. + * jit.dg/test-sum-of-squares.c (verify_code): Likewise. + * jit.dg/test-threads.c (note): New function, adding thread-safety + on top of dejagnu_note, the latter being the implementation + found in dejagnu.h. + (run_threaded_test): Use note rather than printf. + 2014-10-07 David Malcolm dmalc...@redhat.com * jit.dg/jit.exp (jit-dg-test): Prepend the installed bindir to diff --git a/gcc/testsuite/jit.dg/harness.h b/gcc/testsuite/jit.dg/harness.h index cee42f3..f326891 100644 --- a/gcc/testsuite/jit.dg/harness.h +++ b/gcc/testsuite/jit.dg/harness.h @@ -22,6 +22,7 @@ #ifdef MAKE_DEJAGNU_H_THREADSAFE #define pass dejagnu_pass #define fail dejagnu_fail +#define note dejagnu_note #endif #include dejagnu.h @@ -29,6 +30,7 @@ #ifdef MAKE_DEJAGNU_H_THREADSAFE #undef pass #undef fail +#undef note #endif static char test[1024]; @@ -165,7 +167,7 @@ static void set_options (gcc_jit_context *ctxt, const char *argv0) gcc_jit_context_set_bool_option ( ctxt, GCC_JIT_BOOL_OPTION_DUMP_SUMMARY, -1); +0); } #ifndef TEST_ESCHEWS_TEST_JIT diff --git a/gcc/testsuite/jit.dg/test-dot-product.c b/gcc/testsuite/jit.dg/test-dot-product.c index 2cde66d..a41109a 100644 --- a/gcc/testsuite/jit.dg/test-dot-product.c +++ b/gcc/testsuite/jit.dg/test-dot-product.c @@ -123,7 +123,7 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result) CHECK_NON_NULL (my_dot_product); double test_array[] = {1., 2., 3., 4., 5., 6., 7., 8., 9., 10.}; double val = my_dot_product (10, test_array, test_array); - printf(my_dot_product returned: %f\n, val); + note (my_dot_product returned: %f, val); CHECK_VALUE (val, 385.0); } diff --git a/gcc/testsuite/jit.dg/test-factorial.c b/gcc/testsuite/jit.dg/test-factorial.c index eecd831..b2aaece 100644 --- a/gcc/testsuite/jit.dg/test-factorial.c +++ b/gcc/testsuite/jit.dg/test-factorial.c @@ -97,7 +97,7 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result) (my_factorial_fn_type)gcc_jit_result_get_code (result, my_factorial); CHECK_NON_NULL (my_factorial); int val = my_factorial
PR 62127 (VLA arrays and remap_type)
Hi, this patch fixes bug I introduced in 2014-06-28 Jan Hubicka hubi...@ucw.cz * tree-inline.c (remap_type_1): Do not duplicate fields that are shared in between type and its main variant. when adding the conditionals I somehow dropped else path of the test copying TREE_TYPE of array. Bootstrapped/regtested x86_64-linux, comitted as obvious. Honza PR tree-optimization/62127 * g++.dg/torture/pr62127.C: New testcase. * tree.c (remap_type_1): When remapping array, remap also its type. Index: testsuite/g++.dg/torture/pr62127.C === --- testsuite/g++.dg/torture/pr62127.C (revision 0) +++ testsuite/g++.dg/torture/pr62127.C (revision 0) @@ -0,0 +1,11 @@ +// { dg-do compile } +struct A +{ + A(int); +}; + +A::A(int i) +{ + int x[1][i]; + x[0][0] = 0; +} Index: tree-inline.c === --- tree-inline.c (revision 216145) +++ tree-inline.c (working copy) @@ -496,6 +496,8 @@ remap_type_1 (tree type, copy_body_data if (TYPE_MAIN_VARIANT (new_tree) != new_tree TREE_TYPE (type) == TREE_TYPE (TYPE_MAIN_VARIANT (type))) TREE_TYPE (new_tree) = TREE_TYPE (TYPE_MAIN_VARIANT (new_tree)); + else + TREE_TYPE (new_tree) = remap_type (TREE_TYPE (new_tree), id); if (TYPE_MAIN_VARIANT (new_tree) != new_tree) {
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [12/n] Optimize string functions
On 10 Oct 10:25, Jeff Law wrote: On 10/10/14 08:19, Ilya Enkovich wrote: So is the purpose here to expose the checks that would normally be done in the mem* routines to their caller in the hopes that doing so will expose redundant checks? Or is there some other reason? There are few reasons to replace instrumented string functions: 1. As you said following redundant checks elimination may remove checks for some string functions 2. By default functions like memcpy should assume pointers are copied and copy bounds. If we know pointers are not copied, we may use faster version with no bounds copy 3. If we avoid both checks and bounds copy then it is a candidate for existing string function calls inlining in expand pass Perfect. So this belongs in a comment in the code. I thought tests will be added later. Did you already post them? There's been so many patches I'm starting to lose track :-) I didn't post tests yet. For future reference, when you break a submission down into logical hunks, including the tests in those logical hunks helps. I realize the MPX work isn't as well suited for that kind of breakdown, but it's worth keeping in mind. I have ~250 tests to commit. Will check I have tests for optimizations. Excellent. BTW this particular optimization cann't work until we have instrumented builtin calls. Yea, hopefully we'll get to that before close of stage1. It's a nit, but I'd tend to write that as: if (!fndecl_nochk) continue; fndecl = fndecl_nochk gimple_call_set_fndecl (stmt, fndecl); There is one more assignment to fndecl above which makes your version nonequivalent. I had assumed the gimple_call_set_fndecl was a nop if we didn't change the fndecl. Is that not the case? Right. But (!fndecl_nochk) doesn't mean we didn't change fndecl because there is another set to fndecl above. Ilya I'm a bit surprised we don't have this kind of capability already broken out. But assuming that's the case, can you go ahead and break that out into its own little helper function?You don't need to find all the cases where we're doing this kind of thing today, just create the helper function and use it in your new code. I could miss such function (looked in cfg hooks and tree-cfg.h). Hopefully someone will correct me if it is so. Thanks. I suspect everyone has just done their own implementation inline like you did. It's something I'll be keeping my eye out for in others' code so we can funnel everyone into your new function. ISTM many speculative optimizations are going to need that kind of helper. Taking into account not instrumented builtin calls I suppose this patch goes into a next builtin related series. But here is a version with changes. Yea, I think you're right. I think this is OK when the builtins are done. jeff
PING: Re: [patch] tag ../include/*
PING On 10/07/14 09:37, Aldy Hernandez wrote: Is there a reason we don't create etags for toplevel include files? If not, could I please apply this patch? Thanks. Aldy
Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On Fri, Oct 10, 2014 at 8:03 PM, Jeff Law l...@redhat.com wrote: On 10/10/14 01:42, Evgeny Stupachenko wrote: Hi, The patch enables EBX in RA for x86 32bits PIC mode. It was discussed here: https://gcc.gnu.org/ml/gcc-patches/2014-09/msg02513.html Now there is working version with good performance and stability level - it could be a solid first step of EBX enabling. Bootstrap and make check passed. There are several changes in -m32 make check. New pass: gcc.target/i386/pr57003.c - before patch there was not enough registers to PASS ?!? That doesn't make a lot of sense. More likely it was Uros's fix from yesterday to regcprop which causes this to pass again. Correct. I've marked it by mistake. The test is flaky and the patch does not change anything for the test. Is it possible you updated your sources between testing runs and as a result picked up Uros's fix? New fails: gcc.target/i386/pic-1.c (test for errors, line 12) - now there are no errors as we can do asm insertions with EBX I think you should remove the dg-error directive. That turns this test into a simple confirmation that we can use %ebx in an asm even when generating PIC code. Can you add a PR markers to your changelog PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 Actually I think there is an additional test in 47602. Can you please add it to the suite? You'll also want to change the state of 47602 to RESOLVED/FIXED. gcc.target/i386/pr23098.c scan-assembler-not .LC[0-9] - potential performance opportunity using constant immediate If you're not going to fix it, then you should xfail it. gcc.target/i386/pr55458.c (test for errors, line 10) - now there are no errors as there enough registers Right. Remove the dg-error and turn this into a test that effective verifies that %ebx is no longer fixed when generating PIC code on i686. With those changes this is OK for the trunk. jeff ChangeLog for testsuite: 2014-10-13 Evgeny Stupachenko evstu...@gmail.com PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now. * gcc.target/i386/pr55458.c: Likewise. * gcc.target/i386/pr47602.c: New. * gcc.target/i386/pr23098.c: Move to XFAIL. enabling_ebx_tests.patch Description: Binary data
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [15/n] Optimize redundant checks
2014-10-10 20:56 GMT+04:00 Jeff Law l...@redhat.com: On 10/10/14 09:50, Ilya Enkovich wrote: Checks and and intersection removal code was added as a simple pass catching trivial cases. I'm sure there are optimizations having common elements with what checker optimizer does. But initially we didn't want to adopt existing optimizers because GIMPLE representation of instrumentation was not stable and also we still don't know what are important targets for optimizations. Understood. The plan is to have stable version first. After enabling we want to make performance analysis and determine which optimizations are most required (it may appear checks removal doesn't give any significant performance gain at all), determine which of current infrastructure may be re-used (if any) and implement proper checker optimization. Current optimizer is a simple code cleanup. I do not think we should make any significant rework of it as a part of enabling. If current approach seems to require significant changes to go to trunk then it should be probably delayed and go separately from instrumentation pass. Well, I think it should be trivial to handle the redundant check elimination in DOM. Most likely eliminate_redundant_computations needs some work to allow it to look inside those checks and get them recorded into its tables. With that in place, DOM should optimize this stuff without further intervention. It's probably less code than you've already written :-) The swapping variant feels like it should be simple to implement with the existing dominator walkers. But I haven't thought nearly as much about that one. jeff I'll look into DOM and a possibility to use it for checks removal. But I give higher priority to builtins instrumentation and therefore prefer to delay this one and return to it after builtins instrumentation work or in case there is some spare time for it. This patch is not critical for checker functionality and may be excluded from initial commit. Thanks, Ilya
Re: [PATCH 2/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
-#define PIC_OFFSET_TABLE_REGNUM \ - ((TARGET_64BIT (ix86_cmodel == CM_SMALL_PIC \ - || TARGET_PECOFF)) \ - || !flag_pic ? INVALID_REGNUM \ - : reload_completed ? REGNO (pic_offset_table_rtx) \ +#define PIC_OFFSET_TABLE_REGNUM \ + ((TARGET_64BIT (ix86_cmodel == CM_SMALL_PIC \ + || TARGET_PECOFF)) \ + || !flag_pic ? INVALID_REGNUM \ + : pic_offset_table_rtx ? INVALID_REGNUM \ : REAL_PIC_OFFSET_TABLE_REGNUM) No negative conditions, please. Also, please follow established multi-level condition format, please see e.g. HARD_REGNO_NREGS definition in i386.h. I don't see how we can avoid negative condition here. If we remove not from !flag_pic we'll need to add not to TARGET_64BIT and TARGET_PECOFF. I've done it this way: +#define PIC_OFFSET_TABLE_REGNUM \ + ((TARGET_64BIT (ix86_cmodel == CM_SMALL_PIC \ + || TARGET_PECOFF)) \ + || !flag_pic \ + ? INVALID_REGNUM\ + : pic_offset_table_rtx \ + ? INVALID_REGNUM \ + : REAL_PIC_OFFSET_TABLE_REGNUM) Is it ok? On Fri, Oct 10, 2014 at 6:01 PM, Evgeny Stupachenko evstu...@gmail.com wrote: Updated ChangeLog: 2014-10-10 Ilya Enkovich ilya.enkov...@intel.com Vladimir Makarov vmaka...@redhat.com * config/i386/i386.c (ix86_use_pseudo_pic_reg): New. (ix86_init_pic_reg): New. (ix86_select_alt_pic_regnum): Add check on pseudo register. (ix86_save_reg): Likewise. (ix86_expand_prologue): Remove PIC register initialization now performed in ix86_init_pic_reg. (ix86_output_function_epilogue): Add check on pseudo register. (set_pic_reg_ever_alive): New. (legitimize_pic_address): Replace df_set_regs_ever_live with new set_pic_reg_ever_alive. (legitimize_tls_address): Likewise. (ix86_pic_register_p): New check. (ix86_delegitimize_address): Add check on pseudo register. (ix86_expand_call): Insert move from pseudo PIC register to ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. (TARGET_INIT_PIC_REG): New. (TARGET_USE_PSEUDO_PIC_REG): New. * config/i386/i386.h (PIC_OFFSET_TABLE_REGNUM): Return INVALID_REGNUM if pic_offset_table_rtx exists. - if (pic_offset_table_rtx) + if (pic_offset_table_rtx + (!reload_completed || !ix86_use_pseudo_pic_reg ())) Hm, can you please add a comment for this change? I've added the following comment to the patch: -in which case we return (%ecx - %ebx) + foo. */ +in which case we return (%ecx - %ebx) + foo. + +Note that when pseudo_pic_reg is used we can generate it only +before reload_completed. */ On Fri, Oct 10, 2014 at 4:36 PM, Jakub Jelinek ja...@redhat.com wrote: On Fri, Oct 10, 2014 at 02:34:07PM +0200, Rainer Orth wrote: Uros Bizjak ubiz...@gmail.com writes: On Fri, Oct 10, 2014 at 9:43 AM, Evgeny Stupachenko evstu...@gmail.com wrote: i386 specific part of the patch: 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com Vladimir Makarov vmaka...@redhat.com * gcc/config/i386/i386.c (ix86_use_pseudo_pic_reg): New. Evgeny: here and in your other submissions: drop the gcc prefix from the pathnames. They are all relative to the directory the ChangeLog lives in. And add a blank line after after the e-mail lines. Jakub
Re: [PATCH 6/n] OpenMP 4.0 offloading infrastructure: option handling
On 10/13/2014 12:33 PM, Ilya Verbin wrote: On 13 Oct 12:19, Jakub Jelinek wrote: But I'd like to understand why is this one needed. Why should the compilers care? Aggregates layout and alignment of integral/floating types must match between host and offload compilers, sure, but isn't that something streamed already in the LTO bytecode? Or is LTO streamer not streaming some types like long_type_node? It isn't, see the preload_common_nodes code. Also, the backend needs to choose the right Pmode (and in the case of ptx, emit a directive about address sizes). Bernd
Re: [PATCH 3/5] IPA ICF pass
On 10/11/2014 02:05 AM, Martin Liška wrote: On 09/26/2014 09:46 PM, Jan Hubicka wrote: Hi, this is on ipa-icf-gimple.c @@ -2827,11 +2829,19 @@ cgraph_node::verify_node (void) { if (verify_edge_corresponds_to_fndecl (e, decl)) { - error (edge points to wrong declaration:); - debug_tree (e-callee-decl); - fprintf (stderr, Instead of:); - debug_tree (decl); - error_found = true; + /* The edge can be redirected in WPA by IPA ICF. +Following check really ensures that it's +not the case. */ + + cgraph_node *current_node = cgraph_node::get (decl); + if (!current_node || !current_node-icf_merged) I would move this into verify_edge_corresponds_to_fndecl. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c new file mode 100644 index 000..7031eaa --- /dev/null +++ b/gcc/ipa-icf-gimple.c @@ -0,0 +1,384 @@ +/* Interprocedural Identical Code Folding pass + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska mli...@suse.cz + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ Please add toplevel comment about what the code does and how to use it. +namespace ipa_icf { + +/* Basic block equivalence comparison function that returns true if + basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond. */ ... to each other? I would add short comment that as comparsion goes you build voclabulary of equivalences of variables/ssanames etc. So people reading the code do not get lost at very beggining. + +bool +func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2) +{ + unsigned i; + gimple_stmt_iterator gsi1, gsi2; + gimple s1, s2; + + if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count + || bb1-edge_count != bb2-edge_count) +return RETURN_FALSE (); The UPPERCASE looks ugly. I see that RETURN_FALSE is a warpper for return_false_with_msg that outputs line and file information. I would make it lowercase even if it is macro. You may consider using CXX_MEM_STAT_INFO style default argument to avoid function macro completely. Probably not big win given that it won't save you from preprocesor mess. + + gsi1 = gsi_start_bb (bb1-bb); + gsi2 = gsi_start_bb (bb2-bb); + + for (i = 0; i bb1-nondbg_stmt_count; i++) +{ + if (is_gimple_debug (gsi_stmt (gsi1))) + gsi_next_nondebug (gsi1); + + if (is_gimple_debug (gsi_stmt (gsi2))) + gsi_next_nondebug (gsi2); + + s1 = gsi_stmt (gsi1); + s2 = gsi_stmt (gsi2); + + if (gimple_code (s1) != gimple_code (s2)) + return RETURN_FALSE_WITH_MSG (gimple codes are different); I think you need to compare EH here. Consider case where one unit is compiled with -fno-exception and thus all EH regions are removed, while other function has EH regions in it. Those are not equivalent. EH region is obtained by lookup_stmt_eh and then you need to comapre them for match as you do with gimple_resx_regoin. + t1 = gimple_call_fndecl (s1); + t2 = gimple_call_fndecl (s2); + + /* Function pointer variables are not supported yet. */ They seems to be, compare_operand seems just right. + +/* Verifies for given GIMPLEs S1 and S2 that + label statements are semantically equivalent. */ + +bool +func_checker::compare_gimple_label (gimple g1, gimple g2) +{ + if (m_ignore_labels) +return true; + + tree t1 = gimple_label_label (g1); + tree t2 = gimple_label_label (g2); + + return compare_tree_ssa_label (t1, t2); +} I would expect the main BB loop to record BB in which label belongs to and the BB assciatio neing checked here. Otherwise I do not see how switch statements are compared to not have different permutations of targets. Also note that one BB may have multiple labels in them and they are equivalent. Also I would punt on occurence of FORCED_LABEL. Those are tricky as they may be passed around and compared for address and no one really defines what should happen. Better to avoid those. Hi. I will remove this support in the pass. + +/* Verifies for given
[jit] Work around a bug in dejagnu.exp's host_execute
Committed to branch dmalcolm/jit: dejagnu.exp's host_execute has a bug, which (for me) reliably breaks the jit testsuite when running make check (though not when running make check-jit by itself, and appears to be unpleasantly timing-related). See the discussion at: http://lists.gnu.org/archive/html/dejagnu/2014-10/msg0.html For now, work around it by adding a patched copy of host_execute to jit.exp, deleting the bogus line. gcc/testsuite/ChangeLog.jit: * jit.dg/jit.exp (fixed_host_execute): New function, taken from host_execute in DejaGnu's dejagnu.exp, with one line removed. (jit-dg-test): Use fixed_host_execute, rathern than host_execute. --- gcc/testsuite/ChangeLog.jit | 6 ++ gcc/testsuite/jit.dg/jit.exp | 137 ++- 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/ChangeLog.jit b/gcc/testsuite/ChangeLog.jit index 7703212..97d0f8b 100644 --- a/gcc/testsuite/ChangeLog.jit +++ b/gcc/testsuite/ChangeLog.jit @@ -1,5 +1,11 @@ 2014-10-13 David Malcolm dmalc...@redhat.com + * jit.dg/jit.exp (fixed_host_execute): New function, taken from + host_execute in DejaGnu's dejagnu.exp, with one line removed. + (jit-dg-test): Use fixed_host_execute, rathern than host_execute. + +2014-10-13 David Malcolm dmalc...@redhat.com + * jit.dg/harness.h [MAKE_DEJAGNU_H_THREADSAFE] (note): Redefine note from dejagnu.h to new function dejagnu_note so that we can make note be threadsafe. diff --git a/gcc/testsuite/jit.dg/jit.exp b/gcc/testsuite/jit.dg/jit.exp index 4672063..76a1d9d 100644 --- a/gcc/testsuite/jit.dg/jit.exp +++ b/gcc/testsuite/jit.dg/jit.exp @@ -23,6 +23,135 @@ load_lib target-libpath.exp load_lib gcc.exp load_lib dejagnu.exp +# This is host_execute from dejagnu.exp commit +# 126a089777158a7891ff975473939f08c0e31a1c +# with the following patch applied, and renaming to fixed_host_execute. +# See the discussion at +# http://lists.gnu.org/archive/html/dejagnu/2014-10/msg0.html +# +# --- /usr/share/dejagnu/dejagnu.exp.old 2014-10-08 13:38:57.274068541 -0400 +# +++ /usr/share/dejagnu/dejagnu.exp 2014-10-10 12:27:51.113813659 -0400 +# @@ -113,8 +113,6 @@ proc host_execute {args} { +# set timetol 0 +# set arguments +# +# -expect_before buffer_full { perror Buffer full } +# - +# if { [llength $args] == 0} { +# set executable $args +# } else { + + +# Execute the executable file, and anaylyse the output for the +# test state keywords. +#Returns: +# A (empty) string if everything worked, or an error message +# if there was a problem. +# +proc fixed_host_execute {args} { +global text +global spawn_id + +set timeoutmsg Timed out: Never got started, +set timeout 100 +set file all +set timetol 0 +set arguments + +if { [llength $args] == 0} { + set executable $args +} else { + set executable [string trimleft [lindex [split $args ] 0] \{] + set params [string trimleft [lindex [split $args ] 1] \{] + set params [string trimright $params \}] +} + +verbose The executable is $executable 2 +if {![file exists ${executable}]} { + perror The executable, \$executable\ is missing 0 + return No source file found +} + +# spawn the executable and look for the DejaGnu output messages from the +# test case. +# spawn -noecho -open [open |./${executable} r] +spawn -noecho ./${executable} ${params} +expect_after full_buffer { error got full_buffer } + +set prefix \[^\r\n\]* +expect { + -re ^$prefix\[0-9\]\[0-9\]:..:..:${text}*\r\n { + regsub \[\n\r\t\]*NOTE: $text\r\n $expect_out(0,string) output + verbose $output 3 + set timetol 0 + exp_continue + } + -re ^$prefix\tNOTE:${text}* { + regsub \[\n\r\t\]*NOTE: $text\r\n $expect_out(0,string) output + set output [string range $output 6 end] + verbose $output 2 + set timetol 0 + exp_continue + } + -re ^$prefix\tPASSED:${text}* { + regsub \[\n\r\t\]*PASSED: $text\r\n $expect_out(0,string) output + set output [string range $output 8 end] + pass $output + set timetol 0 + exp_continue + } + -re ^$prefix\tFAILED:${text}* { + regsub \[\n\r\t\]*FAILED: $text\r\n $expect_out(0,string) output + set output [string range $output 8 end] + fail $output + set timetol 0 + exp_continue + } + -re ^$prefix\tUNTESTED:${text}* { + regsub \[\n\r\t\]*TESTED: $text\r\n $expect_out(0,string) output + set output [string range $output 8 end] + untested $output + set timetol 0 + exp_continue + } + -re ^$prefix\tUNRESOLVED:${text}* { + regsub \[\n\r\t\]*UNRESOLVED:
Re: [PATCH 3/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
Patch updated with the comment: diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2a64d2d..5fd6a82 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12455,9 +12455,18 @@ ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool) || REGNO (parts.index) = FIRST_PSEUDO_REGISTER))) cost++; + /* When address base or index is pic_offset_table_rtx we don't increase + address cost. When a memop with pic_offset_table_rtx is not invariant + itself it most likely means that base or index is not invariant. + Therefore only pic_offset_table_rtx could be hoisted out, which is not + profitable for x86. */ if (parts.base + (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.base)) (!REG_P (parts.base) || REGNO (parts.base) = FIRST_PSEUDO_REGISTER) parts.index + (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.index)) (!REG_P (parts.index) || REGNO (parts.index) = FIRST_PSEUDO_REGISTER) parts.base != parts.index) cost++; On Fri, Oct 10, 2014 at 3:04 PM, Uros Bizjak ubiz...@gmail.com wrote: On Fri, Oct 10, 2014 at 9:58 AM, Evgeny Stupachenko evstu...@gmail.com wrote: the patch improves performance when previous are applied. It makes RTL loop invariant behavior for GOT loads same as it was before the 2 previous patches. The patch fixes x86 address cost so that cost for addresses with GOT register becomes less, how it was before enabling EBX. In x86_address_cost the result of “REGNO (parts.base) = FIRST_PSEUDO_REGISTER” for hard ebx was always false. The patch makes condition result the same when parts.base is GOT register (the same for parts.index). 2014-10-08 Evgeny Stupachenko evstu...@gmail.com * gcc/config/i386/i386.c (ix86_address_cost): Lower cost for when address contains GOT register. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b43e870..9d8cfd1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12497,8 +12497,12 @@ ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool) cost++; Please add a short comment here, explaining the reason for new condition. if (parts.base + (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.base)) (!REG_P (parts.base) || REGNO (parts.base) = FIRST_PSEUDO_REGISTER) parts.index + (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.index)) (!REG_P (parts.index) || REGNO (parts.index) = FIRST_PSEUDO_REGISTER) parts.base != parts.index) cost++; Otherwise LGTM, but please repost the patch with a comment. Uros.
Re: [PATCH 2/3] libstdc++: Add put_time support.
On 13/10/14 13:08 +0100, Jonathan Wakely wrote: On 15/04/14 23:20 +0200, Rüdiger Sonderfeld wrote: Described in [ext.manip]. * libstdc++-v3/include/std/iomanip (_Put_time): New struct. (put_time): New manipulator. (operator): New overloaded function. * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/1.cc: * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc: * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/1.cc: * libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/wchar_t/2.cc: New file. The 27_io/manipulators/extended/put_time/char/2.cc and 27_io/manipulators/extended/put_time/wchar_t/2.cc tests fail for me. i2.exe: /home/jwakely/src/gcc/gcc/libstdc++-v3/testsuite/27_io/manipulators/extended/put_time/char/2.cc:41: void test01(): Assertion `oss.str() == Son 1971' failed. FAIL: 27_io/manipulators/extended/put_time/char/2.cc execution test With my de_DE.utf8 locale the output is So 1971 not Son 1971. $ LANG=de_DE.utf8 date +%a Mo
Re: [PATCH] move many gc hashtab to hash_table
On Fri, Oct 10, 2014 at 1:44 PM, Trevor Saunders tsaund...@mozilla.com wrote: On Fri, Oct 10, 2014 at 02:26:00PM -0600, Jeff Law wrote: On 10/06/14 21:24, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, This changes almost all of the ggc htab that don't use the if_marked option to be hash_tables. I added a for_user gty attribute so that types could be used from user marking routines without either using the mangled names or being referenced in a template argument which was previously the only way to get gengtype to emit user callable marking routines. bootstrapped + regtested on x86_64-unknown-linux-gnu, ran make all-gcc for ppc64-linux, ppc64-apple-darwin, mips-elf, m32c-elf, mep-elf, and x86_64-apple-darwin. I haven't heard back yet on my request for a cfarm account so I haven't tried bootstrapping on other archs, but more testing is most welcome. Ok? Trev gcc/ * asan.c, cfgloop.c, cfgloop.h, cgraph.c, cgraph.h, config/darwin.c, config/m32c/m32c.c, config/mep/mep.c, config/mips/mips.c, config/rs6000/rs6000.c, dwarf2out.c, function.c, function.h, gimple-ssa.h, libfuncs.h, optabs.c, output.h, rtl.h, sese.c, symtab.c, tree-cfg.c, tree-dfa.c, tree-ssa.c, varasm.c: Use hash-table instead of hashtab. * doc/gty.texi (for_user): Document new option. * gengtype.c (create_user_defined_type): Don't try to get a struct for char. (walk_type): Don't error out on for_user option. (write_func_for_structure): Emit user marking routines if requested by for_user option. (write_local_func_for_structure): Likewise. (main): Mark types with for_user option as used. * ggc.h (gt_pch_nx): Add overload for unsigned int. * hash-map.h (hash_map::hash_entry::pch_nx_helper): AddOverloads. * hash-table.h (ggc_hasher): New struct. (hash_table::create_ggc): New function. (gt_pch_nx): New overload for hash_table. java/ * class.c, decl.c, except.c, expr.c, java-tree.h, lang.c: Use hash_table instead of hashtab. objc/ * objc-act.c: use hash_table instead of hashtab. cp/ * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c, pt.c, semantics.c, tree.c, typeck2.c: Use hash_table instead of hashtab. fortran/ * trans-decl.c, trans.c, trans.h: Use hash_table instead of hashtab. c-family/ * c-common.c: Use hash_table instead of hashtab. Whee, meaning we no longer have to pass through void * pointers and the visually ugly casting that requires in the callbacks?!? Yea! yeah, you can even use an iterator now if you like -++*((unsigned HOST_WIDE_INT *) data); +++* data; I think coding standards require no space here. Obviously a nit. oops, definitely should be fixed :) This is obviously a fairly mechanical change. I did some spot checking and it looks good. I don't expect any fallout. Well, its large enough I expect *something* but I doubt it'll be a big deal. Good for the trunk. thanks for the review, I know it was rather big. This breaks bootstrap on Linux/x86: https://gcc.gnu.org/ml/gcc-regression/2014-10/msg00237.html This patch fixes it. -- H.J. -- diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c index 6b8f706..40d32d8 100644 --- a/gcc/cp/mangle.c +++ b/gcc/cp/mangle.c @@ -3744,7 +3744,7 @@ mangle_conv_op_name_for_type (const tree type) char buffer[64]; /* Create a unique name corresponding to TYPE. */ - sprintf (buffer, operator %lu, conv_type_names-elements ()); + sprintf (buffer, operator %lu, (long) conv_type_names-elements ()); identifier = get_identifier (buffer); *slot = identifier; diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 47b5d93..390e63e 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -22324,12 +22324,12 @@ void print_template_statistics (void) { fprintf (stderr, decl_specializations: size %ld, %ld elements, - %f collisions\n, decl_specializations-size (), - decl_specializations-elements (), + %f collisions\n, (long) decl_specializations-size (), + (long) decl_specializations-elements (), decl_specializations-collisions ()); fprintf (stderr, type_specializations: size %ld, %ld elements, - %f collisions\n, type_specializations-size (), - type_specializations-elements (), + %f collisions\n, (long) type_specializations-size (), + (long) type_specializations-elements (), type_specializations-collisions ()); }
Re: New rematerialization sub-pass in LRA
I don't see significant performance changes from the patch (with and without patch enabling ebx) on x86 in 32bits mode. Thanks, Evgeny On Sat, Oct 11, 2014 at 2:31 AM, Sebastian Pop seb...@gmail.com wrote: Vladimir Makarov wrote: I've tested and benchmarked the sub-pass on x86-64 and ARM. The sub-pass permits to generate a smaller code in average on both architecture (although improvement no-significant), adds 0.4% additional compilation time in -O2 mode of release GCC (according user time of compilation of 500K lines fortran program and valgrind lakey # insns in combine.i compilation) and about 0.7% in -O0 mode. As the performance result, the best I found is 1% SPECFP2000 improvement on ARM Ecynos 5410 (973 vs 963) but for Intel Haswell the performance results are practically the same (Haswell has a very good sophisticated memory sub-system). On aarch64 I have seen some minor perf improvements to libpng compress and decompress. The patch does not change the perf for all other benchmarks that I have tested. Thanks, Sebastian
Re: [PATCH] move many gc hashtab to hash_table
On Mon, Oct 13, 2014 at 8:33 AM, H.J. Lu hjl.to...@gmail.com wrote: On Fri, Oct 10, 2014 at 1:44 PM, Trevor Saunders tsaund...@mozilla.com wrote: On Fri, Oct 10, 2014 at 02:26:00PM -0600, Jeff Law wrote: On 10/06/14 21:24, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, This changes almost all of the ggc htab that don't use the if_marked option to be hash_tables. I added a for_user gty attribute so that types could be used from user marking routines without either using the mangled names or being referenced in a template argument which was previously the only way to get gengtype to emit user callable marking routines. bootstrapped + regtested on x86_64-unknown-linux-gnu, ran make all-gcc for ppc64-linux, ppc64-apple-darwin, mips-elf, m32c-elf, mep-elf, and x86_64-apple-darwin. I haven't heard back yet on my request for a cfarm account so I haven't tried bootstrapping on other archs, but more testing is most welcome. Ok? Trev gcc/ * asan.c, cfgloop.c, cfgloop.h, cgraph.c, cgraph.h, config/darwin.c, config/m32c/m32c.c, config/mep/mep.c, config/mips/mips.c, config/rs6000/rs6000.c, dwarf2out.c, function.c, function.h, gimple-ssa.h, libfuncs.h, optabs.c, output.h, rtl.h, sese.c, symtab.c, tree-cfg.c, tree-dfa.c, tree-ssa.c, varasm.c: Use hash-table instead of hashtab. * doc/gty.texi (for_user): Document new option. * gengtype.c (create_user_defined_type): Don't try to get a struct for char. (walk_type): Don't error out on for_user option. (write_func_for_structure): Emit user marking routines if requested by for_user option. (write_local_func_for_structure): Likewise. (main): Mark types with for_user option as used. * ggc.h (gt_pch_nx): Add overload for unsigned int. * hash-map.h (hash_map::hash_entry::pch_nx_helper): AddOverloads. * hash-table.h (ggc_hasher): New struct. (hash_table::create_ggc): New function. (gt_pch_nx): New overload for hash_table. java/ * class.c, decl.c, except.c, expr.c, java-tree.h, lang.c: Use hash_table instead of hashtab. objc/ * objc-act.c: use hash_table instead of hashtab. cp/ * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c, pt.c, semantics.c, tree.c, typeck2.c: Use hash_table instead of hashtab. fortran/ * trans-decl.c, trans.c, trans.h: Use hash_table instead of hashtab. c-family/ * c-common.c: Use hash_table instead of hashtab. Whee, meaning we no longer have to pass through void * pointers and the visually ugly casting that requires in the callbacks?!? Yea! yeah, you can even use an iterator now if you like -++*((unsigned HOST_WIDE_INT *) data); +++* data; I think coding standards require no space here. Obviously a nit. oops, definitely should be fixed :) This is obviously a fairly mechanical change. I did some spot checking and it looks good. I don't expect any fallout. Well, its large enough I expect *something* but I doubt it'll be a big deal. Good for the trunk. thanks for the review, I know it was rather big. This breaks bootstrap on Linux/x86: https://gcc.gnu.org/ml/gcc-regression/2014-10/msg00237.html This patch fixes it. This is what I checked in. -- H.J. --- Index: ChangeLog === --- ChangeLog (revision 216150) +++ ChangeLog (working copy) @@ -1,3 +1,9 @@ +2014-10-13 H.J. Lu hongjiu...@intel.com + + * mangle.c (mangle_conv_op_name_for_type): Cast elements to + unsigned long. + (print_template_statistics): Cast size and elements to long. + 2014-10-12 Trevor Saunders tsaund...@mozilla.com * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c, Index: mangle.c === --- mangle.c (revision 216150) +++ mangle.c (working copy) @@ -3744,7 +3744,8 @@ mangle_conv_op_name_for_type (const tree char buffer[64]; /* Create a unique name corresponding to TYPE. */ - sprintf (buffer, operator %lu, conv_type_names-elements ()); + sprintf (buffer, operator %lu, + (unsigned long) conv_type_names-elements ()); identifier = get_identifier (buffer); *slot = identifier; Index: pt.c === --- pt.c (revision 216150) +++ pt.c (working copy) @@ -22324,12 +22324,12 @@ void print_template_statistics (void) { fprintf (stderr, decl_specializations: size %ld, %ld elements, - %f collisions\n, decl_specializations-size (), - decl_specializations-elements (), + %f collisions\n, (long) decl_specializations-size (), + (long) decl_specializations-elements (), decl_specializations-collisions ()); fprintf (stderr, type_specializations: size %ld, %ld elements, - %f collisions\n, type_specializations-size (), - type_specializations-elements
Re: [PATCH 0/2] xtensa: fix floating-point parts of machine description
On Sun, Oct 12, 2014 at 3:46 PM, Max Filippov jcmvb...@gmail.com wrote: Hi Sterling, this series fixes two bugs in xtensa.md: HI Max, thanks for this. I don't see a patch though.
Re: [PATCH 2/2] xtensa: use pre- and postincrement FP load/store when available
On Sun, Oct 12, 2014 at 3:46 PM, Max Filippov jcmvb...@gmail.com wrote: 2014-10-10 Max Filippov jcmvb...@gmail.com gcc/ * config/xtensa/xtensa.h (TARGET_HARD_FLOAT_POSTINC): new macro. * config/xtensa/xtensa.md (*lsiu, *ssiu): add dependency on !TARGET_HARD_FLOAT_POSTINC. (*lsip, *ssip): new instructions. Approved. Do you have write priviliges?
Re: [PATCH 1/2] xtensa: drop unimplemented floating point operations
On Sun, Oct 12, 2014 at 3:46 PM, Max Filippov jcmvb...@gmail.com wrote: xtensa ISA never implemented FP division, reciprocal, square root and inverse square root as single opcode. Remove patterns that can emit them. 2014-10-09 Max Filippov jcmvb...@gmail.com gcc/ * config/xtensa/xtensa.md (divsf3, *recipsf2, sqrtsf2, *rsqrtsf2): remove. Approved.
Re: [PATCH] Add zero-overhead looping for xtensa backend
On Fri, Oct 10, 2014 at 6:59 AM, Felix Yang fei.yang0...@gmail.com wrote: Hi Sterling, I made some improvement to the patch. Two changes: 1. TARGET_LOOPS is now used as a condition of the doloop related patterns, which is more elegant. Fine. 2. As the trip count register of the zero-cost loop maybe potentially spilled, we need to change the patterns in order to handle this issue. Actually, for xtensa you don't. The trip count is copied into LCOUNT at the execution of the loop instruction, and therefore a spill or whatever doesn't matter--it won't affect the result. So as long as you have the trip count at the start of the loop, you are fine. This does bring up an issue of whether or not the trip count can be modified during the loop. (note that this is different than early exit.) If it can, you can't use a zero-overhead loop. Does your patch address this case. The solution is similar to that adapted by c6x backend. Just turn the zero-cost loop into a regular loop when that happens when reload is completed. Attached please find version 4 of the patch. Make check regression tested with xtensa-elf-gcc/simulator. OK for trunk?
Re: [PATCH 2/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On Mon, Oct 13, 2014 at 5:01 PM, Evgeny Stupachenko evstu...@gmail.com wrote: -#define PIC_OFFSET_TABLE_REGNUM \ - ((TARGET_64BIT (ix86_cmodel == CM_SMALL_PIC \ - || TARGET_PECOFF)) \ - || !flag_pic ? INVALID_REGNUM \ - : reload_completed ? REGNO (pic_offset_table_rtx) \ +#define PIC_OFFSET_TABLE_REGNUM \ + ((TARGET_64BIT (ix86_cmodel == CM_SMALL_PIC \ + || TARGET_PECOFF)) \ + || !flag_pic ? INVALID_REGNUM \ + : pic_offset_table_rtx ? INVALID_REGNUM \ : REAL_PIC_OFFSET_TABLE_REGNUM) No negative conditions, please. Also, please follow established multi-level condition format, please see e.g. HARD_REGNO_NREGS definition in i386.h. I don't see how we can avoid negative condition here. If we remove not from !flag_pic we'll need to add not to TARGET_64BIT and TARGET_PECOFF. I've done it this way: +#define PIC_OFFSET_TABLE_REGNUM \ + ((TARGET_64BIT (ix86_cmodel == CM_SMALL_PIC \ + || TARGET_PECOFF)) \ + || !flag_pic \ + ? INVALID_REGNUM\ + : pic_offset_table_rtx \ + ? INVALID_REGNUM \ + : REAL_PIC_OFFSET_TABLE_REGNUM) Is it ok? Oh, indeed. I missed the logical or. Maybe put the first condition into parenthesis, to avoid confusion. OK in any case. Thanks, Uros.
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [15/n] Optimize redundant checks
On 10/13/14 08:58, Ilya Enkovich wrote: I'll look into DOM and a possibility to use it for checks removal. But I give higher priority to builtins instrumentation and therefore prefer to delay this one and return to it after builtins instrumentation work or in case there is some spare time for it. This patch is not critical for checker functionality and may be excluded from initial commit. OK. When you're ready to look at DOM, don't hesitate to contact me. I probably know that code better than anyone. jeff
Re: [PATCH 3/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On Mon, Oct 13, 2014 at 5:17 PM, Evgeny Stupachenko evstu...@gmail.com wrote: Patch updated with the comment: diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2a64d2d..5fd6a82 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12455,9 +12455,18 @@ ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool) || REGNO (parts.index) = FIRST_PSEUDO_REGISTER))) cost++; + /* When address base or index is pic_offset_table_rtx we don't increase + address cost. When a memop with pic_offset_table_rtx is not invariant + itself it most likely means that base or index is not invariant. + Therefore only pic_offset_table_rtx could be hoisted out, which is not + profitable for x86. */ if (parts.base + (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.base)) (!REG_P (parts.base) || REGNO (parts.base) = FIRST_PSEUDO_REGISTER) parts.index + (!pic_offset_table_rtx + || REGNO (pic_offset_table_rtx) != REGNO(parts.index)) (!REG_P (parts.index) || REGNO (parts.index) = FIRST_PSEUDO_REGISTER) parts.base != parts.index) cost++; LGTM. OK. Thanks, Uros.
Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com wrote: ChangeLog for testsuite: 2014-10-13 Evgeny Stupachenko evstu...@gmail.com PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now. * gcc.target/i386/pr55458.c: Likewise. * gcc.target/i386/pr47602.c: New. * gcc.target/i386/pr23098.c: Move to XFAIL. Reversed patch was attached. Please repost. Uros.
RE: New rematerialization sub-pass in LRA
Here is a new rematerialization sub-pass of LRA. I've tested and benchmarked the sub-pass on x86-64 and ARM. The sub-pass permits to generate a smaller code in average on both architecture (although improvement no-significant), adds 0.4% additional compilation time in -O2 mode of release GCC (according user time of compilation of 500K lines fortran program and valgrind lakey # insns in combine.i compilation) and about 0.7% in -O0 mode. As the performance result, the best I found is 1% SPECFP2000 improvement on ARM Ecynos 5410 (973 vs 963) but for Intel Haswell the performance results are practically the same (Haswell has a very good sophisticated memory sub-system). I ran SPEC2k on AArch64, and EON fails to run correctly with -fno-caller-saves -mcpu=cortex-a57 -fomit-frame-pointer -Ofast. I'm not sure whether this is AArch64 specific, but previously non-optimal register allocation choices triggered A latent bug in ree (it's unclear why GCC still allocates FP registers in high-pressure integer code, as I set the costs for int-FP moves high). On SPECINT2k performance is ~0.5% worse (5.5% regression on perlbmk), and SPECFP is ~0.2% faster. Generally I think it is good to have a specific pass for rematerialization. However should this not also affect the costs of instructions that can be cheaply rematerialized? Similarly for the choice whether to caller save or spill (today the caller-save code doesn't care at all about rematerialization, so it aggressively caller-saves values which could be rematerialized - see eg. https://gcc.gnu.org/ml/gcc/2014-09/msg00071.html). Also I am confused by the claim memory reads are not profitable to rematerialize. Surely rematerializing a memory read from const-data or literal pool is cheaper than spilling as you avoid a store to the stack? Wilco
Re: PING: Re: [patch] tag ../include/*
On 10/13/14 08:53, Aldy Hernandez wrote: PING On 10/07/14 09:37, Aldy Hernandez wrote: Is there a reason we don't create etags for toplevel include files? If not, could I please apply this patch? ENOPATCH jeff
Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On 10/13/14 08:53, Evgeny Stupachenko wrote: ChangeLog for testsuite: 2014-10-13 Evgeny Stupachenko evstu...@gmail.com PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now. * gcc.target/i386/pr55458.c: Likewise. * gcc.target/i386/pr47602.c: New. * gcc.target/i386/pr23098.c: Move to XFAIL. Looks like you goof'd the patch slightly (reversed). It's trivial enough that I can see what the correctly ordered patch is doing. OK for the trunk at the same time the rest of the bits go in. jeff
Re: [PATCH] Add zero-overhead looping for xtensa backend
Thanks for the comments. The patch checked the usage of teh trip count register, making sure that it is not used in the loop body other than the doloop_end or lives past the doloop_end instruction, as the following code snippet shows: + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (loop-iter_reg_used || loop-iter_reg_used_outside) +{ + if (dump_file) +fprintf (dump_file, ;; loop %d uses iterator\n, + loop-loop_no); + return false; +} For the spill issue, I think we need to handle it. The reason is that currently we are not telling GCC about the existence of the LCOUNT register. Instead, we keep the trip count in a general register and it's possible that this register can be spilled when register pressure is high. It's a good idea to post another patch to describe the LCOUNT register in GCC in order to free this general register. But I want this patch applied as a first step, OK? Cheers, Felix On Tue, Oct 14, 2014 at 12:09 AM, augustine.sterl...@gmail.com augustine.sterl...@gmail.com wrote: On Fri, Oct 10, 2014 at 6:59 AM, Felix Yang fei.yang0...@gmail.com wrote: Hi Sterling, I made some improvement to the patch. Two changes: 1. TARGET_LOOPS is now used as a condition of the doloop related patterns, which is more elegant. Fine. 2. As the trip count register of the zero-cost loop maybe potentially spilled, we need to change the patterns in order to handle this issue. Actually, for xtensa you don't. The trip count is copied into LCOUNT at the execution of the loop instruction, and therefore a spill or whatever doesn't matter--it won't affect the result. So as long as you have the trip count at the start of the loop, you are fine. This does bring up an issue of whether or not the trip count can be modified during the loop. (note that this is different than early exit.) If it can, you can't use a zero-overhead loop. Does your patch address this case. The solution is similar to that adapted by c6x backend. Just turn the zero-cost loop into a regular loop when that happens when reload is completed. Attached please find version 4 of the patch. Make check regression tested with xtensa-elf-gcc/simulator. OK for trunk?
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [12/n] Optimize string functions
On 10/13/14 08:51, Ilya Enkovich wrote: I had assumed the gimple_call_set_fndecl was a nop if we didn't change the fndecl. Is that not the case? Right. But (!fndecl_nochk) doesn't mean we didn't change fndecl because there is another set to fndecl above. Thanks for clarifying. This is OK when the builtins are done. jeff
Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
Reattached. On Mon, Oct 13, 2014 at 8:22 PM, Uros Bizjak ubiz...@gmail.com wrote: On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com wrote: ChangeLog for testsuite: 2014-10-13 Evgeny Stupachenko evstu...@gmail.com PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now. * gcc.target/i386/pr55458.c: Likewise. * gcc.target/i386/pr47602.c: New. * gcc.target/i386/pr23098.c: Move to XFAIL. Reversed patch was attached. Please repost. Uros. enabling_ebx_tests.patch Description: Binary data
Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On Mon, Oct 13, 2014 at 6:32 PM, Evgeny Stupachenko evstu...@gmail.com wrote: Reattached. On Mon, Oct 13, 2014 at 8:22 PM, Uros Bizjak ubiz...@gmail.com wrote: On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com wrote: ChangeLog for testsuite: 2014-10-13 Evgeny Stupachenko evstu...@gmail.com PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now. * gcc.target/i386/pr55458.c: Likewise. * gcc.target/i386/pr47602.c: New. * gcc.target/i386/pr23098.c: Move to XFAIL. Reversed patch was attached. Please repost. OK. Thanks, Uros.
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [8/n] Remove useless builtin calls
On 10/13/14 08:07, Ilya Enkovich wrote: Tests instrumentation are still needed. With some basic tests and instrumentation this will be OK. I hate to be harping tests, but few developers are going to be familiar with the MPX and related infrastructure and those tests are critical to helping them know when they break something. Similarly if the plan is to iterate on improving things, then those basic functionality tests will ultimately save time as you can smoke test before running larger benchmarks. jeff Here is a version with tests added. Thanks, Ilya -- gcc/ 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_remove_useless_builtins): New. (chkp_execute): Remove useless calls to Pointer Bounds Checker builtins. gcc/testsuite 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * gcc.target/i386/chkp-builtins-1.c: New. * gcc.target/i386/chkp-builtins-2.c: New. * gcc.target/i386/chkp-builtins-3.c: New. * gcc.target/i386/chkp-builtins-4.c: New. OK. Jeff
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [9/n] Static constructors
On 10/13/14 06:41, Ilya Enkovich wrote: 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (MAX_STMTS_IN_STATIC_CHKP_CTOR): New. (chkp_ctor_stmt_list): New. (chkp_register_var_initializer): New. (chkp_add_modification_to_stmt_list): New. (chkp_output_static_bounds): New. (chkp_finish_file): New. (chkp_instrument_function): Remove useless statements from static bounds constructors. * tree-chkp.h (chkp_register_var_initializer): New. (chkp_finish_file): New. * doc/invoke.texi (chkp-max-ctor-size): New. * params.def (PARAM_CHKP_MAX_CTOR_SIZE): New. OK. jeff
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [10/n] Stores handler
On 10/13/14 05:23, Ilya Enkovich wrote: 2014-10-09 22:51 GMT+04:00 Jeff Law l...@redhat.com: On 10/08/14 13:12, Ilya Enkovich wrote: Hi, This patch adds an assignment processing function which is used by lnliner for newly generated stores. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_copy_bounds_for_assign): New. * tree-chkp.h (chkp_copy_bounds_for_assign): New. This probably should have been part of the inliner submission since that's the only place its used and one needs the inliner context to know how this function is going to be used. Presumably the reason its not in tree-inline and static is you want to utilize chkp_walk_pointer_assignments? The code is fine, just want to make sure its goes into a logical place. Jeff I have to export either chkp_copy_bounds_for_assign or chkp_walk_pointer_assignments with chkp_copy_bounds_for_elem. No much difference but I'd prefer to keep all memrefs processing codes in tree-chkp.c. OK. jeff
[jit] Fix a comment in configure.ac
Committed to branch dmalcolm/jit: The relevant code in jit/internal-api.c was moved to jit/jit-playback.c in 3071567787aef4a8ada8b38c890d01c19b4b998f. gcc/ChangeLog.jit: * configure.ac: Update a reference to jit/internal-api.c to jit/jit-playback.c. * configure: Regenerate. --- gcc/ChangeLog.jit | 6 ++ gcc/configure | 2 +- gcc/configure.ac | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog.jit b/gcc/ChangeLog.jit index c887473..340c9de 100644 --- a/gcc/ChangeLog.jit +++ b/gcc/ChangeLog.jit @@ -1,3 +1,9 @@ +2014-10-13 David Malcolm dmalc...@redhat.com + + * configure.ac: Update a reference to jit/internal-api.c to + jit/jit-playback.c. + * configure: Regenerate. + 2014-10-07 David Malcolm dmalc...@redhat.com * Makefile.in (site.exp): When constructing site.exp, add a line diff --git a/gcc/configure b/gcc/configure index 8ec141f..81634f2 100755 --- a/gcc/configure +++ b/gcc/configure @@ -28191,7 +28191,7 @@ _ACEOF fi # Generate gcc-driver-name.h containing GCC_DRIVER_NAME for the benefit -# of jit/internal-api.c. +# of jit/jit-playback.c. cat gcc-driver-name.h EOF #define GCC_DRIVER_NAME ${target_noncanonical}-gcc-${gcc_BASEVER}${exeext} EOF diff --git a/gcc/configure.ac b/gcc/configure.ac index 62e5ad7..0af7a77 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -5680,7 +5680,7 @@ if test x${LINKER_HASH_STYLE} != x; then fi # Generate gcc-driver-name.h containing GCC_DRIVER_NAME for the benefit -# of jit/internal-api.c. +# of jit/jit-playback.c. cat gcc-driver-name.h EOF #define GCC_DRIVER_NAME ${target_noncanonical}-gcc-${gcc_BASEVER}${exeext} EOF -- 1.7.11.7
[committed] HP-UX 11 reentrant functions
Most systems don't guard the C reentrant functions but on HP-UX 11 they are not declared unless _REENTRANT is defined. This causes problems for configure, etc. The attached change adds the _REENTRANT define when _HPUX_SOURCE is defined. A similar change was applied to HP-UX 10 three years ago. This resolves PR libfortran/63471. Tested on hppa2.0w-hp-hpux11.11. Committed to trunk. Dave -- John David Anglin dave.ang...@bell.net 2014-01-13 John David Anglin dave.ang...@nrc-cnrc.gc.ca PR libfortran/63471 * config/pa/pa-hpux11.h (TARGET_OS_CPP_BUILTINS): Define _REENTRANT when _HPUX_SOURCE is defined. Index: config/pa/pa-hpux11.h === --- config/pa/pa-hpux11.h (revision 216000) +++ config/pa/pa-hpux11.h (working copy) @@ -40,6 +40,7 @@ if (c_dialect_cxx ()) \ { \ builtin_define (_HPUX_SOURCE);\ + builtin_define (_REENTRANT); \ builtin_define (_INCLUDE_LONGLONG); \ builtin_define (__STDCPP__); \ } \ @@ -48,6 +49,7 @@ if (!flag_iso) \ { \ builtin_define (_HPUX_SOURCE);\ + builtin_define (_REENTRANT); \ if (preprocessing_trad_p ())\ { \ builtin_define (hp9000s800); \
Re: PING: Re: [patch] tag ../include/*
On 10/13/14 09:27, Jeff Law wrote: On 10/13/14 08:53, Aldy Hernandez wrote: PING On 10/07/14 09:37, Aldy Hernandez wrote: Is there a reason we don't create etags for toplevel include files? If not, could I please apply this patch? ENOPATCH jeff Sorry, patch was in original message. Attached here. commit a679529d14f005d8c88517f72d2b5295d8c82f0f Author: Aldy Hernandez al...@redhat.com Date: Tue Oct 7 09:32:21 2014 -0700 * Makefile.in (TAGS): Tag ../include files. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 97b439a..df43b9c 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3772,6 +3772,7 @@ TAGS: lang.tags fi; \ done; \ etags -o TAGS.sub c-family/*.h c-family/*.c *.h *.c *.cc \ + ../include/*.h \ --language=none --regex=/\(char\|unsigned int\|int\|bool\|void\|HOST_WIDE_INT\|enum [A-Za-z_0-9]+\) [*]?\([A-Za-z_0-9]+\)/\2/ common.opt\ --language=none --regex=/\(DEF_RTL_EXPR\|DEFTREECODE\|DEFGSCODE\).*(\([A-Za-z_0-9]+\)/\2/ rtl.def tree.def gimple.def \ --language=none --regex=/DEFTIMEVAR (\([A-Za-z_0-9]+\)/\1/ timevar.def \
Re: [RFC: Patch, PR 60102] [4.9/4.10 Regression] powerpc fp-bit ices@dwf_regno
On Mon, Oct 13, 2014 at 9:20 AM, Ulrich Weigand uweig...@de.ibm.com wrote: Maciej W. Rozycki wrote: On Thu, 9 Oct 2014, Maciej W. Rozycki wrote: Seeing Rohit got good results it has struck me that perhaps one of the patches I had previously reverted, to be able to compile GCC in the first place, interfered with this fix -- I backed out all the subsequent patches to test yours and Rohit's by themselves only. And it was actually the case, with this change: 2013-05-21 Christian Bruel christian.br...@st.com * dwarf2out.c (multiple_reg_loc_descriptor): Use dbx_reg_number for spanning registers. LEAF_REG_REMAP is supported only for contiguous registers. Set register size out of the PARALLEL loop. back in place, in addition to your fix, I get an all-passed score for gdb.base/store.exp. So your change looks good and my decision to back out the other patches unfortunate. I'll yet run full e500v2 testing now to double check, and let you know what the results are, within a couple of hours if things work well. It took a bit more because I saw some regressions that I wanted to investigate. In the end they turned out intermittent and the failures happen sometimes whether your change is applied or not. So I'm fine with your change, thanks for your work and patience. Thanks for verifying! David, is the patch OK to commit now? Okay with me. Thanks! David
[PATCH 0/5] Merger of jit branch (v2)
I'd like to merge the JIT branch into trunk: https://gcc.gnu.org/wiki/JIT This is v2 since it incorporates fixes for the various issues identified by Joseph in an earlier submission: https://gcc.gnu.org/ml/gcc-patches/2014-09/msg02056.html I've split up the current diff between trunk and the branch into 5 areas for ease of review (and to allow for early merger of the supporting work, if it's deemed ready): patch 1: exposes an entrypoint in libiberty that I need patch 2: configure and Makefile changes in gcc patch 3: timevar.h: Add an auto_timevar class patch 4: State cleanups in gcc patch 5: Add the jit code itself [this is a diff of trunk r215958 aka e012cdc775868e9922f5fef9068a764546876d93 which is from 2014-10-06, vs jit branch version 75b3ee7acdc6de55354d65bb7d619386463e50a1]. I've successfully bootstrapped and regression-tested the cumulative result of all of the patches against a control build, building them both with --enable-host-shared, and with --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto adding ,jit to the test build (both on x86_64-unknown-linux-gnu; Fedora 20). There were no regressions vs the control build, and the patched build gains a jit.sum, with 4663 passes (and no failures). OK for trunk? Overall diffstat follows: ChangeLog.jit | 23 + MAINTAINERS|1 + contrib/ChangeLog.jit | 14 + contrib/jit-coverage-report.py | 67 + gcc/ChangeLog.jit | 323 + gcc/Makefile.in|9 + gcc/cgraph.c | 14 + gcc/cgraph.h |6 + gcc/cgraphunit.c | 20 +- gcc/configure | 54 +- gcc/configure.ac | 11 + gcc/diagnostic.c | 11 +- gcc/doc/install.texi |2 +- gcc/dwarf2out.c| 87 + gcc/dwarf2out.h|2 + gcc/gcse.c |9 + gcc/gcse.h |2 + gcc/ggc-page.c |5 + gcc/input.c|2 +- gcc/ipa-cp.c | 12 + gcc/ipa-pure-const.c | 113 +- gcc/ipa-reference.c| 17 +- gcc/ipa-reference.h|1 + gcc/java/ChangeLog.jit | 14 + gcc/jit/ChangeLog |9 + gcc/jit/ChangeLog.jit | 3252 ++ gcc/jit/Make-lang.in | 297 + gcc/jit/TODO.rst | 119 + gcc/jit/config-lang.in | 42 + gcc/jit/docs/Makefile | 153 + gcc/jit/docs/_build/texinfo/Makefile | 50 + gcc/jit/docs/_build/texinfo/factorial.png | Bin 0 - 183838 bytes gcc/jit/docs/_build/texinfo/libgccjit.texi | 6724 gcc/jit/docs/_build/texinfo/sum-of-squares.png | Bin 0 - 22839 bytes gcc/jit/docs/conf.py | 258 + gcc/jit/docs/examples/install-hello-world.c| 123 + gcc/jit/docs/examples/tut01-square.c | 107 + gcc/jit/docs/examples/tut02-sum-of-squares.c | 172 + gcc/jit/docs/examples/tut03-toyvm/Makefile | 11 + gcc/jit/docs/examples/tut03-toyvm/factorial.toy| 50 + gcc/jit/docs/examples/tut03-toyvm/fibonacci.toy| 66 + gcc/jit/docs/examples/tut03-toyvm/toyvm.c | 861 +++ gcc/jit/docs/index.rst | 50 + gcc/jit/docs/internals/index.rst | 157 + gcc/jit/docs/intro/factorial.png | Bin 0 - 183838 bytes gcc/jit/docs/intro/index.rst | 27 + gcc/jit/docs/intro/install.rst | 227 + gcc/jit/docs/intro/sum-of-squares.png | Bin 0 - 22839 bytes gcc/jit/docs/intro/tutorial01.rst | 349 + gcc/jit/docs/intro/tutorial02.rst | 378 ++ gcc/jit/docs/intro/tutorial03.rst | 1108 gcc/jit/docs/topics/contexts.rst | 315 + gcc/jit/docs/topics/expressions.rst| 524 ++ gcc/jit/docs/topics/functions.rst | 311 + gcc/jit/docs/topics/index.rst | 30 + gcc/jit/docs/topics/locations.rst | 69 + gcc/jit/docs/topics/objects.rst| 86 + gcc/jit/docs/topics/results.rst| 48 + gcc/jit/docs/topics/types.rst | 217 +
[PATCH 2/5] gcc: configure and Makefile changes needed by jit
gcc/ChangeLog: * configure.ac (gcc_version): Expose this value for use via AC_SUBST, since the jit code needs it within the new file libgccjit.pc.in. (doc_build_sys): New variable, set to sphinx if sphinx is installed, falling back to texinfo otherwise. (gcc-driver-name.h): Generate a gcc-driver-name.h file containing GCC_DRIVER_NAME for the benefit of jit/jit-playback.c. * configure: Regenerate. * Makefile.in (doc_build_sys): New. (bindir): New. (pkgconfigdir): New. (installdirs): Add creation of $(DESTDIR)$(pkgconfigdir). (site.exp): When constructing site.exp, add a line to set bindir. --- gcc/Makefile.in | 9 + gcc/configure| 54 -- gcc/configure.ac | 11 +++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 1dba76f..f5e3d4c 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -314,6 +314,11 @@ write_entries_to_file = $(shell rm -f $(2) || :) $(shell touch $(2)) \ $(shell expr $(range) + $(write_entries_to_file_split) - 1), $(1)) \ | tr ' ' '\012' $(2))) +# The jit documentation looks better if built with sphinx, but can be +# built with texinfo if sphinx is not available. +# configure sets doc_build_sys to sphinx or texinfo accordingly +doc_build_sys=@doc_build_sys@ + # # UNSORTED # @@ -565,6 +570,8 @@ bindir = @bindir@ libdir = @libdir@ # Directory in which GCC puts its executables. libexecdir = @libexecdir@ +# Directory in which to install .pc files for pkgconfig +pkgconfigdir = @libdir@/pkgconfig # # UNSORTED @@ -3125,6 +3132,7 @@ installdirs: $(mkinstalldirs) $(DESTDIR)$(infodir) $(mkinstalldirs) $(DESTDIR)$(man1dir) $(mkinstalldirs) $(DESTDIR)$(man7dir) + $(mkinstalldirs) $(DESTDIR)$(pkgconfigdir) PLUGIN_HEADERS = $(TREE_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ toplev.h $(DIAGNOSTIC_CORE_H) $(BASIC_BLOCK_H) $(HASH_TABLE_H) \ @@ -3496,6 +3504,7 @@ site.exp: ./config.status Makefile @echo # add them to the last section ./site.tmp @echo set rootme \`${PWD_COMMAND}`\ ./site.tmp @echo set srcdir \`cd ${srcdir}; ${PWD_COMMAND}`\ ./site.tmp + @echo set bindir \`cd ${bindir}; ${PWD_COMMAND}`\ ./site.tmp @echo set host_triplet $(host) ./site.tmp @echo set build_triplet $(build) ./site.tmp @echo set target_triplet $(target) ./site.tmp diff --git a/gcc/configure b/gcc/configure index 380a235..81634f2 100755 --- a/gcc/configure +++ b/gcc/configure @@ -743,6 +743,7 @@ CXXDEPMODE DEPDIR am__leading_dot CXXCPP +doc_build_sys AR NM BISON @@ -824,6 +825,7 @@ build_os build_vendor build_cpu build +gcc_version target_alias host_alias build_alias @@ -3040,6 +3042,7 @@ ac_config_headers=$ac_config_headers auto-host.h:config.in gcc_version=`cat $srcdir/BASE-VER` + # Determine the host, build, and target systems ac_aux_dir= for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do @@ -8060,6 +8063,47 @@ fi fi +# The jit documentation looks better if built with sphinx, but can be +# built with texinfo if sphinx is not available. +# Set doc_build_sys to sphinx or texinfo accordingly. +# Extract the first word of sphinx-build, so it can be a program name with args. +set dummy sphinx-build; ac_word=$2 +{ $as_echo $as_me:${as_lineno-$LINENO}: checking for $ac_word 5 +$as_echo_n checking for $ac_word... 6; } +if test ${ac_cv_prog_doc_build_sys+set} = set; then : + $as_echo_n (cached) 6 +else + if test -n $doc_build_sys; then + ac_cv_prog_doc_build_sys=$doc_build_sys # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z $as_dir as_dir=. +for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f $as_dir/$ac_word$ac_exec_ext $as_test_x $as_dir/$ac_word$ac_exec_ext; }; then +ac_cv_prog_doc_build_sys=sphinx +$as_echo $as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext 5 +break 2 + fi +done + done +IFS=$as_save_IFS + + test -z $ac_cv_prog_doc_build_sys ac_cv_prog_doc_build_sys=texinfo +fi +fi +doc_build_sys=$ac_cv_prog_doc_build_sys +if test -n $doc_build_sys; then + { $as_echo $as_me:${as_lineno-$LINENO}: result: $doc_build_sys 5 +$as_echo $doc_build_sys 6; } +else + { $as_echo $as_me:${as_lineno-$LINENO}: result: no 5 +$as_echo no 6; } +fi + + # # Checks for C headers @@ -18049,7 +18093,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat conftest.$ac_ext _LT_EOF -#line 18052 configure +#line 18096 configure #include confdefs.h #if HAVE_DLFCN_H @@ -18155,7 +18199,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat conftest.$ac_ext _LT_EOF
[PATCH 1/5] libiberty: Expose choose_tmpdir, and fix constness of return type
The jit needs to create a temporary directory, so I need to expose libiberty's choose_tmpdir as a public entrypoint. include/ChangeLog: * libiberty.h (choose_tmpdir): New prototype. libiberty/ChangeLog: * choose-temp.c (choose_tmpdir): Remove now-redundant local copy of prototype. * functions.texi: Regenerate. * make-temp-file.c (choose_tmpdir): Convert return type from char * to const char * - given that this returns a pointer to a memoized allocation, the caller must not touch it. --- include/libiberty.h| 5 + libiberty/choose-temp.c| 1 - libiberty/functions.texi | 13 ++--- libiberty/make-temp-file.c | 4 ++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/libiberty.h b/include/libiberty.h index bcc1f9a..d09c9a5 100644 --- a/include/libiberty.h +++ b/include/libiberty.h @@ -227,6 +227,11 @@ extern char *make_relative_prefix (const char *, const char *, extern char *make_relative_prefix_ignore_links (const char *, const char *, const char *) ATTRIBUTE_MALLOC; +/* Returns a pointer to a directory path suitable for creating temporary + files in. */ + +extern const char *choose_tmpdir (void) ATTRIBUTE_RETURNS_NONNULL; + /* Choose a temporary directory to use for scratch files. */ extern char *choose_temp_base (void) ATTRIBUTE_MALLOC ATTRIBUTE_RETURNS_NONNULL; diff --git a/libiberty/choose-temp.c b/libiberty/choose-temp.c index 0a454cf..8e1e84b 100644 --- a/libiberty/choose-temp.c +++ b/libiberty/choose-temp.c @@ -34,7 +34,6 @@ Boston, MA 02110-1301, USA. */ #endif #include libiberty.h -extern char *choose_tmpdir (void); /* Name of temporary file. mktemp requires 6 trailing X's. */ diff --git a/libiberty/functions.texi b/libiberty/functions.texi index 9323ff9..387aee0 100644 --- a/libiberty/functions.texi +++ b/libiberty/functions.texi @@ -125,7 +125,7 @@ Uses @code{malloc} to allocate storage for @var{nelem} objects of @end deftypefn -@c choose-temp.c:46 +@c choose-temp.c:45 @deftypefn Extension char* choose_temp_base (void) Return a prefix for temporary file names or @code{NULL} if unable to @@ -139,7 +139,7 @@ not recommended. @end deftypefn @c make-temp-file.c:96 -@deftypefn Replacement char* choose_tmpdir () +@deftypefn Replacement const char* choose_tmpdir () Returns a pointer to a directory path suitable for creating temporary files in. @@ -160,9 +160,8 @@ number of seconds used. @dots{}, @code{NULL}) Concatenate zero or more of strings and return the result in freshly -@code{xmalloc}ed memory. Returns @code{NULL} if insufficient memory is -available. The argument list is terminated by the first @code{NULL} -pointer encountered. Pointers to empty strings are ignored. +@code{xmalloc}ed memory. The argument list is terminated by the first +@code{NULL} pointer encountered. Pointers to empty strings are ignored. @end deftypefn @@ -528,7 +527,7 @@ nineteen EBCDIC varying characters is tested; exercise caution.) @end ftable @end defvr -@c hashtab.c:336 +@c hashtab.c:328 @deftypefn Supplemental htab_t htab_create_typed_alloc (size_t @var{size}, @ htab_hash @var{hash_f}, htab_eq @var{eq_f}, htab_del @var{del_f}, @ htab_alloc @var{alloc_tab_f}, htab_alloc @var{alloc_f}, @ @@ -1163,7 +1162,7 @@ control over the state of the random number generator. @end deftypefn -@c concat.c:174 +@c concat.c:160 @deftypefn Extension char* reconcat (char *@var{optr}, const char *@var{s1}, @ @dots{}, @code{NULL}) diff --git a/libiberty/make-temp-file.c b/libiberty/make-temp-file.c index 7b74f81..244cc23 100644 --- a/libiberty/make-temp-file.c +++ b/libiberty/make-temp-file.c @@ -93,7 +93,7 @@ static char *memoized_tmpdir; /* -@deftypefn Replacement char* choose_tmpdir () +@deftypefn Replacement const char* choose_tmpdir () Returns a pointer to a directory path suitable for creating temporary files in. @@ -102,7 +102,7 @@ files in. */ -char * +const char * choose_tmpdir (void) { if (!memoized_tmpdir) -- 1.8.5.3
[PATCH 4/5] State cleanups
This patch contains various cleanups needed by the jit, so that we can rerun the compiler code repeatedly within the same process: * Introduce per-file finalization routines to clean up file-static variables. * Make various initializations idempotent (init_ggc), or cleanup their previous state (init_stringpool). * Introduce a class toplev, so that we can easy control whether timevars are reset each time, or run in a cumulative mode. gcc/ChangeLog: * cgraph.c (cgraph_c_finalize): New function. * cgraph.h (cgraph_c_finalize): New prototype. (cgraphunit_c_finalize): New prototype. (ipa_cp_c_finalize): New prototype. * cgraphunit.c (first_analyzed): Move from analyze_functions to file-scope. (first_analyzed_var): Likewise. (analyze_functions): Move static variables into file-scope. (cgraphunit_c_finalize): New function. * diagnostic.c (diagnostic_finish): Free the memory for context-classify_diagnostic and context-printer, running the destructor for the latter. (bt_stop): Use toplev::main. * dwarf2out.c (dwarf2out_finalize): New function. * dwarf2out.h (dwarf2out_c_finalize): New prototype. * gcse.c (gcse_c_finalize): New function. * gcse.h (gcse_c_finalize): New prototype. * ggc-page.c (init_ggc): Make idempotent. * input.c (input_location): Initialize to UNKNOWN_LOCATION. * ipa-cp.c (ipa_cp_c_finalize): New function. * ipa-pure-const.c (function_insertion_hook_holder): Move to be a field of class pass_ipa_pure_const. (node_duplication_hook_holder): Likewise. (node_removal_hook_holder): Likewise. (register_hooks): Convert to method... (pass_ipa_pure_const::register_hooks): ...here, converting static variable init_p into... (pass_ipa_pure_const::init_p): ...new field. (pure_const_generate_summary): Update invocation of register_hooks to invoke as a method of current_pass. (pure_const_read_summary): Likewise. (propagate): Convert to... (pass_ipa_pure_const::execute): ...method. * ipa-reference.c (ipa_init): Move static bool init_p from here to... (ipa_init_p): New file-scope variable, so that it can be reset when repeatedly invoking the compiler within one process by... (ipa_reference_c_finalize): New function. * ipa-reference.h (ipa_reference_c_finalize): New. * main.c (main): Replace invocation of toplev_main with construction of a toplev instance, and call its main method. * params.c (global_init_params): Add an assert that params_finished is false. (params_c_finalize): New. * params.h (params_c_finalize): New. * passes.c (execute_ipa_summary_passes): Set current_pass before invoking generate_summary, for the benefit of pass_ipa_pure_const. (ipa_write_summaries_2): Assign pass to current_pass global before calling write_summary hook. (ipa_write_optimization_summaries_1): Likewise when calling write_optimization_summary hook. (ipa_read_summaries_1): Likewise for read_summary hook. (ipa_read_optimization_summaries_1): Likewise for read_optimization_summary hook. (execute_ipa_stmt_fixups): Likewise. * stringpool.c (init_stringpool): Clean up if we're called more than once. * timevar.c (timevar_init): Ignore repeated calls. * toplev.c: Include dwarf2out.h, ipa-reference.h, gcse.h. (general_init): Reset input_location to UNKNOWN_LOCATION. (initialize_rtl): Move static local initialized_once into file scope, and rename to... (rtl_initialized): New variable. (do_compile): Move timevar initialization from here to toplev::start_timevars. (toplev::toplev, toplev::~toplev, toplev::start_timevars, toplev::finalize): New functions. (toplev_main): Rename to... (toplev::main): ...this. * toplev.h (class toplev): New class. --- gcc/cgraph.c | 14 +++ gcc/cgraph.h | 6 +++ gcc/cgraphunit.c | 20 - gcc/diagnostic.c | 11 - gcc/dwarf2out.c | 87 +++ gcc/dwarf2out.h | 2 + gcc/gcse.c | 9 gcc/gcse.h | 2 + gcc/ggc-page.c | 5 +++ gcc/input.c | 2 +- gcc/ipa-cp.c | 12 ++ gcc/ipa-pure-const.c | 113 +-- gcc/ipa-reference.c | 17 ++-- gcc/ipa-reference.h | 1 + gcc/main.c | 6 ++- gcc/params.c | 14 +++ gcc/params.h | 4 ++ gcc/passes.c | 6 +++ gcc/stringpool.c | 5 +++ gcc/timevar.c| 3 ++ gcc/toplev.c | 67 +++--- gcc/toplev.h | 19 - 22 files
[PATCH 3/5] timevar.h: Add an auto_timevar class
This is used in a couple of places in jit/jit-playback.c to ensure that we pop the timevar on every exit path from a function. I could rewrite them if need be, but it does simplify things. Written by Tom Tromey. gcc/ChangeLog: * timevar.h (class auto_timevar): New class. --- gcc/timevar.h | 24 1 file changed, 24 insertions(+) diff --git a/gcc/timevar.h b/gcc/timevar.h index 6703cc9..f018e39 100644 --- a/gcc/timevar.h +++ b/gcc/timevar.h @@ -110,6 +110,30 @@ timevar_pop (timevar_id_t tv) timevar_pop_1 (tv); } +// This is a simple timevar wrapper class that pushes a timevar in its +// constructor and pops the timevar in its destructor. +class auto_timevar +{ + public: + auto_timevar (timevar_id_t tv) +: m_tv (tv) + { +timevar_push (m_tv); + } + + ~auto_timevar () + { +timevar_pop (m_tv); + } + + private: + + // Private to disallow copies. + auto_timevar (const auto_timevar ); + + timevar_id_t m_tv; +}; + extern void print_time (const char *, long); #endif /* ! GCC_TIMEVAR_H */ -- 1.8.5.3
Re: PING: Re: [patch] tag ../include/*
On 10/13/14 11:29, Aldy Hernandez wrote: On 10/13/14 09:27, Jeff Law wrote: On 10/13/14 08:53, Aldy Hernandez wrote: PING On 10/07/14 09:37, Aldy Hernandez wrote: Is there a reason we don't create etags for toplevel include files? If not, could I please apply this patch? ENOPATCH jeff Sorry, patch was in original message. Attached here. Ok for the trunk. Sorry it got dropped. jeff
Re: [PATCH 2/3] PR other/61321 - demangler crash on casts in template parameters
Ping. Jason, do you still think the special-case for conversion ops is inappropriate? -cary On Fri, Jul 25, 2014 at 2:16 AM, Pedro Alves pal...@redhat.com wrote: On 07/24/2014 11:35 PM, Cary Coutant wrote: It seems that the problem here is more general; a template argument list is not in scope within that same template argument list. Can't we fix that without special-casing conversion ops? I think conversion ops really are a special case. Thanks Cary. FWIW, I agree. (GDB 7.8 hasn't been released yet, though it's close. If this patch is approved as is, we'll be able to have the crash fixed there. If this requires a significant rewrite though, I'm afraid I might not be able to do it myself anytime soon.) It's the only case where the template parameters refer to the template argument list from the cast operator's enclosing template. In a cast expression, like anywhere else you might have a template parameter, the template parameter refers to the template argument list of the immediately enclosing template. I think this note from Section 5.1.3 (Operator Encodings) of the ABI is what makes this a special case (it's an informative comment in the document, but seems to me to be normative): For a user-defined conversion operator the result type (i.e., the type to which the operator converts) is part of the mangled name of the function. If the conversion operator is a member template, the result type will appear before the template parameters. There may be forward references in the result type to the template parameters. -- Thanks, Pedro Alves
Re: [gofrontend-dev] Re: [PATCH 03/13] HACK! Allow the static chain to be set from C
On Mon, Oct 13, 2014 at 1:10 AM, Richard Biener richard.guent...@gmail.com wrote: On Sat, Oct 11, 2014 at 6:23 AM, Richard Henderson r...@redhat.com wrote: On 10/10/2014 06:42 PM, Peter Collingbourne wrote: A colleague has suggested a perhaps nicer syntax: __builtin_call_chain(pointer, call) where call must be a call expression I like this. Unlike the other suggestions, it doesn't mess with the parsing of the regular part of the function call. And, depending on what point the builtin is lowered and applied to the AST, it might not require any parsing changes at all. I'll have a look at this next week. Thanks. Does the frontend know that the call expects a static chain? The chain is not part of the function type, so no. Peter
Re: [PATCH 1/X, i386, PR54232] Enable EBX for x86 in 32bits PIC code
On Mon, Oct 13, 2014 at 9:32 AM, Evgeny Stupachenko evstu...@gmail.com wrote: Reattached. On Mon, Oct 13, 2014 at 8:22 PM, Uros Bizjak ubiz...@gmail.com wrote: On Mon, Oct 13, 2014 at 4:53 PM, Evgeny Stupachenko evstu...@gmail.com wrote: ChangeLog for testsuite: 2014-10-13 Evgeny Stupachenko evstu...@gmail.com PR target/8340 PR middle-end/47602 PR rtl-optimization/55458 * gcc.target/i386/pic-1.c: Remove dg-error as test should pass now. * gcc.target/i386/pr55458.c: Likewise. * gcc.target/i386/pr47602.c: New. * gcc.target/i386/pr23098.c: Move to XFAIL. Reversed patch was attached. Please repost. Uros. This caused a regression: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63527 -- H.J.
Re: [PATCH] cleanup in c-parser
On 10/13/14 05:11, Anthony Brandon wrote: I updated the patch with a comment. Actually, Manuel handed me this patch just to help me get familiar with the process of submitting and testing. Generating this one with git diff looks different so I'm not sure if that's a problem or not. This version is fine. Please install on the trunk. Thanks, jeff
Re: [PATCH] Fix typo in comment for IRA
On 10/13/14 02:09, Kito Cheng wrote: Hi all: This patch contain lots typo fix for IRA module by aspell :) ChangeLog 2014-10-13 Kito Cheng k...@0xlab.org * ira.c: Fix typo in comment. * ira.h: Ditto. * ira-build.c: Ditto. * ira-color.c: Ditto. * ira-emit.c: Ditto. * ira-int.h: Ditto. * ira-lives.c: Ditto. This is fine for the trunk. Please install. Thanks, Jeff
Re: [PATCH] Fix typo in comment for IRA
On Mon, 13 Oct 2014, Kito Cheng wrote: - -1 if it is not a cost classe. */ + -1 if it is not a cost classes. */ a cost class, no plural here. -- Marc Glisse
Re: [PATCH] move many gc hashtab to hash_table
On Mon, Oct 13, 2014 at 09:02:44AM -0700, H.J. Lu wrote: On Mon, Oct 13, 2014 at 8:33 AM, H.J. Lu hjl.to...@gmail.com wrote: On Fri, Oct 10, 2014 at 1:44 PM, Trevor Saunders tsaund...@mozilla.com wrote: On Fri, Oct 10, 2014 at 02:26:00PM -0600, Jeff Law wrote: On 10/06/14 21:24, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, This changes almost all of the ggc htab that don't use the if_marked option to be hash_tables. I added a for_user gty attribute so that types could be used from user marking routines without either using the mangled names or being referenced in a template argument which was previously the only way to get gengtype to emit user callable marking routines. bootstrapped + regtested on x86_64-unknown-linux-gnu, ran make all-gcc for ppc64-linux, ppc64-apple-darwin, mips-elf, m32c-elf, mep-elf, and x86_64-apple-darwin. I haven't heard back yet on my request for a cfarm account so I haven't tried bootstrapping on other archs, but more testing is most welcome. Ok? Trev gcc/ * asan.c, cfgloop.c, cfgloop.h, cgraph.c, cgraph.h, config/darwin.c, config/m32c/m32c.c, config/mep/mep.c, config/mips/mips.c, config/rs6000/rs6000.c, dwarf2out.c, function.c, function.h, gimple-ssa.h, libfuncs.h, optabs.c, output.h, rtl.h, sese.c, symtab.c, tree-cfg.c, tree-dfa.c, tree-ssa.c, varasm.c: Use hash-table instead of hashtab. * doc/gty.texi (for_user): Document new option. * gengtype.c (create_user_defined_type): Don't try to get a struct for char. (walk_type): Don't error out on for_user option. (write_func_for_structure): Emit user marking routines if requested by for_user option. (write_local_func_for_structure): Likewise. (main): Mark types with for_user option as used. * ggc.h (gt_pch_nx): Add overload for unsigned int. * hash-map.h (hash_map::hash_entry::pch_nx_helper): AddOverloads. * hash-table.h (ggc_hasher): New struct. (hash_table::create_ggc): New function. (gt_pch_nx): New overload for hash_table. java/ * class.c, decl.c, except.c, expr.c, java-tree.h, lang.c: Use hash_table instead of hashtab. objc/ * objc-act.c: use hash_table instead of hashtab. cp/ * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c, pt.c, semantics.c, tree.c, typeck2.c: Use hash_table instead of hashtab. fortran/ * trans-decl.c, trans.c, trans.h: Use hash_table instead of hashtab. c-family/ * c-common.c: Use hash_table instead of hashtab. Whee, meaning we no longer have to pass through void * pointers and the visually ugly casting that requires in the callbacks?!? Yea! yeah, you can even use an iterator now if you like -++*((unsigned HOST_WIDE_INT *) data); +++* data; I think coding standards require no space here. Obviously a nit. oops, definitely should be fixed :) This is obviously a fairly mechanical change. I did some spot checking and it looks good. I don't expect any fallout. Well, its large enough I expect *something* but I doubt it'll be a big deal. Good for the trunk. thanks for the review, I know it was rather big. This breaks bootstrap on Linux/x86: https://gcc.gnu.org/ml/gcc-regression/2014-10/msg00237.html This patch fixes it. This is what I checked in. Sorry for the bustage, and thank you! Trev -- H.J. --- Index: ChangeLog === --- ChangeLog (revision 216150) +++ ChangeLog (working copy) @@ -1,3 +1,9 @@ +2014-10-13 H.J. Lu hongjiu...@intel.com + + * mangle.c (mangle_conv_op_name_for_type): Cast elements to + unsigned long. + (print_template_statistics): Cast size and elements to long. + 2014-10-12 Trevor Saunders tsaund...@mozilla.com * cp-gimplify.c, cp-tree.h, decl.c, mangle.c, name-lookup.c, Index: mangle.c === --- mangle.c (revision 216150) +++ mangle.c (working copy) @@ -3744,7 +3744,8 @@ mangle_conv_op_name_for_type (const tree char buffer[64]; /* Create a unique name corresponding to TYPE. */ - sprintf (buffer, operator %lu, conv_type_names-elements ()); + sprintf (buffer, operator %lu, + (unsigned long) conv_type_names-elements ()); identifier = get_identifier (buffer); *slot = identifier; Index: pt.c === --- pt.c (revision 216150) +++ pt.c (working copy) @@ -22324,12 +22324,12 @@ void print_template_statistics (void) { fprintf (stderr, decl_specializations: size %ld, %ld elements, - %f collisions\n, decl_specializations-size (), - decl_specializations-elements (), + %f collisions\n, (long)
[c++-concepts] cleanup expressions
Sometimes, cleanup_point_exprs are being added to concept definitions. This patch allows that to happen, but removes the cleanup point during normalization. 2014-10-13 Andrew Sutton andrew.n.sut...@gmail.com Fix bug related to cleanup expressions in concept definitions. * gcc/cp/constraint.cc (check_function_concept): See through cleanup handlers when checking the body of a function. (normalize_cast): Removed. Handled in a default case. (normalize_cleanup_point): New. Normalize the expression without the cleanup handler. Andrew Sutton
Re: [c++-concepts] cleanup expressions
And here's the patch: Andrew Sutton On Mon, Oct 13, 2014 at 3:33 PM, Andrew Sutton andrew.n.sut...@gmail.com wrote: Sometimes, cleanup_point_exprs are being added to concept definitions. This patch allows that to happen, but removes the cleanup point during normalization. 2014-10-13 Andrew Sutton andrew.n.sut...@gmail.com Fix bug related to cleanup expressions in concept definitions. * gcc/cp/constraint.cc (check_function_concept): See through cleanup handlers when checking the body of a function. (normalize_cast): Removed. Handled in a default case. (normalize_cleanup_point): New. Normalize the expression without the cleanup handler. Andrew Sutton Index: constraint.cc === --- constraint.cc (revision 215720) +++ constraint.cc (working copy) @@ -285,6 +285,14 @@ check_function_concept (tree fn) tree body = DECL_SAVED_TREE (fn); if (TREE_CODE (body) == BIND_EXPR) body = BIND_EXPR_BODY (body); + + // Sometimes a funciton call results the creation of clean up + // points. Allow these to be preserved in the body of the + // constraint, as we might actually need them for some constexpr + // evaluations. + if (TREE_CODE (body) == CLEANUP_POINT_EXPR) +body = TREE_OPERAND(body, 0); + if (TREE_CODE (body) != RETURN_EXPR) error_at (loc, function concept definition %qD has multiple statements, fn); @@ -316,9 +324,9 @@ tree normalize_expr_req (tree); tree normalize_type_req (tree); tree normalize_nested_req (tree); tree normalize_var (tree); +tree normalize_cleanup_point (tree); tree normalize_template_id (tree); tree normalize_stmt_list (tree); -tree normalize_cast (tree); tree normalize_atom (tree); // Reduce the requirement T into a logical formula written in terms of @@ -383,12 +391,12 @@ normalize_expr (tree t) case TEMPLATE_ID_EXPR: return normalize_template_id (t); -case CAST_EXPR: - return normalize_cast (t); - case BIND_EXPR: return normalize_node (BIND_EXPR_BODY (t)); +case CLEANUP_POINT_EXPR: + return normalize_cleanup_point (t); + // Do not recurse. case TAG_DEFN: return NULL_TREE; @@ -655,12 +663,11 @@ normalize_requires (tree t) return t; } -// Normalize a cast expression. +// Normalize a cleanup point by normalizing the underlying +// expression. tree -normalize_cast (tree t) -{ - // return normalize_node (TREE_VALUE (TREE_OPERAND (t, 0))); - return normalize_atom (t); +normalize_cleanup_point (tree t) { + return normalize_node (TREE_OPERAND (t, 0)); } // Normalize an atomic expression by performing some basic checks.
[c++-concepts] introduction syntax regression
The original patch for concept introductions was not popping a deferred access check. This fixes that problem, although I'm not sure if we need to defer access checks at all. 2014-10-13 Andrew Sutton andrew.n.sut...@gmail.com Fix regression related to concept introductions. * gcc/cp/constraint.cc (cp_parser_template_declaration_after_exp): Pop deferred access checks afer parsing the introduction. Andrew Sutton Index: parser.c === --- parser.c (revision 214991) +++ parser.c (working copy) @@ -24409,19 +24409,21 @@ cp_parser_template_declaration_after_exp = current_template_reqs; } } - else if(flag_concepts) + else if (flag_concepts) { need_lang_pop = false; checks = NULL; saved_template_reqs = release (current_template_reqs); - push_deferring_access_checks (dk_deferred); // Scope may be changed by a nested-name-specifier. tree saved_scope = parser-scope; tree saved_qualifying_scope = parser-qualifying_scope; tree saved_object_scope = parser-object_scope; + push_deferring_access_checks (dk_deferred); parameter_list = cp_parser_template_introduction (parser); + pop_deferring_access_checks (); + if (parameter_list == error_mark_node) { // Restore template requirements before returning.
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [14/n] Optimize constant checks
On 09 Oct 11:13, Jeff Law wrote: On 10/08/14 13:21, Ilya Enkovich wrote: Hi, This patch adds a removal of checks known to always pass into checker optimization. Thanks, Ilya -- 2014-10-08 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp.c (chkp_remove_check_if_pass): New. (chkp_remove_constant_checks): New. (chkp_opt_execute): Run constant check removal algorithm. So again, I'd like to see all the optimization stuff pulled into its own file and and basic tests that we can use for smoke testing now and in the future. + else if (result == -1) +{ + if (dump_file (dump_flags TDF_DETAILS)) +fprintf (dump_file, action: keep check (always fail)\n); +} ISTM this case should generate a compile-time warning. We've just determined statically that this test is always going to fail, right? + /* Iterate throw all found checks in BB. */ s/throw/through/ With the changes above, this will be OK for the trunk. Jeff Thanks for review! Here is a version with a warning and a couple of tests added. Ilya -- gcc/ 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * tree-chkp-opt.c: Include diagnostic.h. (chkp_remove_check_if_pass): New. (chkp_remove_constant_checks): New. (chkp_opt_execute): Run constant check removal algorithm. * c-family/c.opt (Wchkp): New. gcc/testsuite/ 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * gcc.target/i386/chkp-const-check-1.c: New. * gcc.target/i386/chkp-const-check-2.c: New. diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 1ca5a95..5202e3c 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -323,6 +323,10 @@ Wchar-subscripts C ObjC C++ ObjC++ Var(warn_char_subscripts) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn about subscripts whose type is \char\ +Wchkp +C ObjC C++ ObjC++ Var(warn_chkp) Warning EnabledBy(Wall) +Warn about memory access errors found by Pointer Bounds Checker + Wclobbered C ObjC C++ ObjC++ Var(warn_clobbered) Warning EnabledBy(Wextra) Warn about variables that might be changed by \longjmp\ or \vfork\ diff --git a/gcc/testsuite/gcc.target/i386/chkp-const-check-1.c b/gcc/testsuite/gcc.target/i386/chkp-const-check-1.c new file mode 100644 index 000..8c90239 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/chkp-const-check-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp -O2 -fdump-tree-chkpopt } */ +/* { dg-final { scan-tree-dump-not bndcl chkpopt } } */ +/* { dg-final { scan-tree-dump-not bndcu chkpopt } } */ + + +int test (int *p) +{ + p = (int *)__builtin___bnd_set_ptr_bounds (p, sizeof (int)); + return *p; +} diff --git a/gcc/testsuite/gcc.target/i386/chkp-const-check-2.c b/gcc/testsuite/gcc.target/i386/chkp-const-check-2.c new file mode 100644 index 000..ab573eb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/chkp-const-check-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options -fcheck-pointer-bounds -mmpx -fdump-tree-chkp -O2 -Wchkp } */ + +int test (int *p) +{ + p = (int *)__builtin___bnd_set_ptr_bounds (p, sizeof (int)); + return *(p + 1); /* { dg-warning memory access check always fail } */ +} diff --git a/gcc/tree-chkp-opt.c b/gcc/tree-chkp-opt.c index 620df47..5112769 100644 --- a/gcc/tree-chkp-opt.c +++ b/gcc/tree-chkp-opt.c @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include gimplify-me.h #include expr.h #include tree-chkp.h +#include diagnostic.h enum check_type { @@ -693,6 +694,48 @@ chkp_get_check_result (struct check_info *ci, tree bounds) return res; } +/* Try to compare bounds value and address value + used in the check CI. If we can prove that check + always pass then remove it. */ +static void +chkp_remove_check_if_pass (struct check_info *ci) +{ + int result = 0; + + if (dump_file (dump_flags TDF_DETAILS)) +{ + fprintf (dump_file, Trying to remove check: ); + print_gimple_stmt (dump_file, ci-stmt, 0, 0); +} + + result = chkp_get_check_result (ci, ci-bounds); + + if (result == 1) +{ + gimple_stmt_iterator i = gsi_for_stmt (ci-stmt); + + if (dump_file (dump_flags TDF_DETAILS)) + fprintf (dump_file, action: delete check (always pass)\n); + + gsi_remove (i, true); + unlink_stmt_vdef (ci-stmt); + release_defs (ci-stmt); + ci-stmt = NULL; +} + else if (result == -1) +{ + if (dump_file (dump_flags TDF_DETAILS)) + fprintf (dump_file, action: keep check (always fail)\n); + warning_at (gimple_location (ci-stmt), OPT_Wchkp, + memory access check always fail); +} + else if (result == 0) +{ + if (dump_file (dump_flags TDF_DETAILS)) + fprintf (dump_file, action: keep check (cannot compute result)\n); +} +} + /* For bounds used in CI check if bounds are produced by intersection and we may use
Re: [Patch] Fix PR61889 for the w64-mingw32 case
Honza, not sure if this patch is idea, but this will unblock mingw build problems. Can this one get in? thanks, David On Wed, Sep 24, 2014 at 8:22 AM, Rainer Emrich rai...@emrich-ebersheim.de wrote: The following patch fixes PR61889 for x86_64-w64-mingw32. Details can be found on https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61889 The patch was bootstrapped on x86_64-w64-mingw32. If patch the patch is ok, Kai would you apply, please? Rainer 2014-09-24 Rainer Emrich rai...@emrich-ebersheim.de PR gcov-profile/61889 * gcc/gcov-tool.c: Remove wrong #if !defined(_WIN32) * libgcc/libgcov-driver-system.c: undefine clashing macro for mkdir Index: gcc/gcov-tool.c === --- gcc/gcov-tool.c (Revision 215554) +++ gcc/gcov-tool.c (Arbeitskopie) @@ -89,11 +89,7 @@ gcov_output_files (const char *out, stru /* Try to make directory if it doesn't already exist. */ if (access (out, F_OK) == -1) { -#if !defined(_WIN32) if (mkdir (out, S_IRWXU | S_IRWXG | S_IRWXO) == -1 errno != EEXIST) -#else - if (mkdir (out) == -1 errno != EEXIST) -#endif fatal_error (Cannot make directory %s, out); } else unlink_profile_dir (out); Index: libgcc/libgcov-driver-system.c === --- libgcc/libgcov-driver-system.c (Revision 215554) +++ libgcc/libgcov-driver-system.c (Arbeitskopie) @@ -66,6 +66,9 @@ create_file_directory (char *filename) #ifdef TARGET_POSIX_IO mkdir (filename, 0755) == -1 #else +#ifdef mkdir +#undef mkdir +#endif mkdir (filename) == -1 #endif /* The directory might have been made by another process. */
Re: RFA: fix mode confusion in caller-save.c:replace_reg_with_saved_mem
On 10/11/14 03:32, Joern Rennecke wrote: On 10 October 2014 21:13, Jeff Law l...@redhat.com wrote: ... ISTM it would be better to find the mode of the same class that corresponds to GET_MODE_SIZE (mode) / nregs. In your case that's obviously QImode :-) Like this? Or did you mean to remove the save_mode[regno] use altogether? I can think of arguments for or against, but got no concrete examples for either. Yea, that's basically what I was thinking. I probably wouldn't have bothered with the if (hard_regno ...) check, but I can see why you might want that added measure of safety before slamming in a new mode. I think you want smode in the mode_for_size call rather than mode, right (both instances)? If that fixes your your problem and passes the usual bootstrap and regression test, then it's OK with me. I can see Richard S. getting in here one day and saying, umm, this all needs further refinement, but at least this hunk of code is in better shape. jeff
Re: [C++] Handle || ! for simd vectors
Ping https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00361.html (sorry that my message looked like I had committed as obvious) On Sat, 4 Oct 2014, Marc Glisse wrote: On Thu, 2 Oct 2014, Jason Merrill wrote: OK. Thanks. While committing, I noticed that I restricted ! to integer vectors, whereas it seems to work just fine with scalar floats, so it would make sense to extend it to float vectors. Tested on x86_64-linux-gnu. 2014-10-04 Marc Glisse marc.gli...@inria.fr gcc/cp/ * typeck.c (cp_build_unary_op) [TRUTH_NOT_EXPR]: Accept float vectors. gcc/testsuite/ * g++.dg/ext/vector9.C: Test ! with float vectors. -- Marc Glisse
Re: [PATCH, Pointer Bounds Checker 14/x] Passes [2/n] IPA passes
On 10/13/14 04:48, Ilya Enkovich wrote: -- 2014-10-13 Ilya Enkovich ilya.enkov...@intel.com * ipa-chkp.c: New. * ipa-chkp.h: New. * Makefile.in (OBJS): Add ipa-chkp.o. * c-family/c-common.c (c_common_attribute_table): Fix documentation. (c_common_format_attribute_table): Likewsie. OK for the trunk. jeff
Re: [PATCH IRA] update_equiv_regs fails to set EQUIV reg-note for pseudo with more than one definition
On 10/11/14 06:44, Felix Yang wrote: Hello Jeff, I see that you have improved the RTL typesafety issue for ira.c, so I rebased this patch on the latest trunk and change to use the new list walking interface. Bootstrapped on x86_64-SUSE-Linux and make check regression tested. OK for trunk? OK for the trunk. Thanks for your patience. jeff
[PATCH] Handle CFA adjust notes in combine stack adjustments
I was hitting this during a i386 bootstrap, wherein csa managed to combine two insns that both had REG_CFA_ADJUST_CFA notes, but didn't modify the notes. This lead to an abort later in dwarf2cfi, when the inconsistency was noticed. I'm not quite sure why this was happening on my branch but not mainline, but I'm applying it here anyway. Tested on i686 and x86_64. r~ 2014-10-13 Richard Henderson r...@redhat.com * combine-stack-adj.c (no_unhandled_cfa): New. (maybe_merge_cfa_adjust): New. (combine_stack_adjustments_for_block): Use them. * g++.dg/torture/20141013.C: New. diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c index aebdf87..844873c 100644 --- a/gcc/combine-stack-adj.c +++ b/gcc/combine-stack-adj.c @@ -190,6 +190,44 @@ record_one_stack_ref (rtx_insn *insn, rtx *ref, struct csa_reflist *next_reflist return ml; } +/* We only know how to adjust the CFA; no other frame-related changes + may appear in any insn to be deleted. */ + +static bool +no_unhandled_cfa (rtx_insn *insn) +{ + if (!RTX_FRAME_RELATED_P (insn)) +return true; + + /* No CFA notes at all is a legacy interpretation like + FRAME_RELATED_EXPR, and is context sensitive within + the prologue state machine. We can't handle that here. */ + bool has_cfa_adjust = false; + + for (rtx link = REG_NOTES (insn); link; link = XEXP (link, 1)) +switch (REG_NOTE_KIND (link)) + { + default: +break; + case REG_CFA_ADJUST_CFA: + has_cfa_adjust = true; + break; + + case REG_FRAME_RELATED_EXPR: + case REG_CFA_DEF_CFA: + case REG_CFA_OFFSET: + case REG_CFA_REGISTER: + case REG_CFA_EXPRESSION: + case REG_CFA_RESTORE: + case REG_CFA_SET_VDRAP: + case REG_CFA_WINDOW_SAVE: + case REG_CFA_FLUSH_QUEUE: + return false; + } + + return has_cfa_adjust; +} + /* Attempt to apply ADJUST to the stack adjusting insn INSN, as well as each of the memories and stack references in REFLIST. Return true on success. */ @@ -320,6 +358,44 @@ maybe_move_args_size_note (rtx_insn *last, rtx_insn *insn, bool after) add_reg_note (last, REG_ARGS_SIZE, XEXP (note, 0)); } +/* Merge any REG_CFA_ADJUST_CFA note from SRC into DST. + AFTER is true iff DST follows SRC in the instruction stream. */ + +static void +maybe_merge_cfa_adjust (rtx_insn *dst, rtx_insn *src, bool after) +{ + rtx snote = NULL, dnote = NULL; + rtx sexp, dexp; + rtx exp1, exp2; + + if (RTX_FRAME_RELATED_P (src)) +snote = find_reg_note (src, REG_CFA_ADJUST_CFA, NULL_RTX); + if (snote == NULL) +return; + sexp = XEXP (snote, 0); + + if (RTX_FRAME_RELATED_P (dst)) +dnote = find_reg_note (dst, REG_CFA_ADJUST_CFA, NULL_RTX); + if (dnote == NULL) +{ + add_reg_note (dst, REG_CFA_ADJUST_CFA, sexp); + return; +} + dexp = XEXP (dnote, 0); + + gcc_assert (GET_CODE (sexp) == SET); + gcc_assert (GET_CODE (dexp) == SET); + + if (after) +exp1 = dexp, exp2 = sexp; + else +exp1 = sexp, exp2 = dexp; + + SET_SRC (exp1) = simplify_replace_rtx (SET_SRC (exp1), SET_DEST (exp2), +SET_SRC (exp2)); + XEXP (dnote, 0) = exp1; +} + /* Return the next (or previous) active insn within BB. */ static rtx_insn * @@ -491,12 +567,15 @@ combine_stack_adjustments_for_block (basic_block bb) /* Combine an allocation into the first instruction. */ if (STACK_GROWS_DOWNWARD ? this_adjust = 0 : this_adjust = 0) { - if (try_apply_stack_adjustment (last_sp_set, reflist, - last_sp_adjust + this_adjust, - this_adjust)) + if (no_unhandled_cfa (insn) + try_apply_stack_adjustment (last_sp_set, reflist, +last_sp_adjust ++ this_adjust, +this_adjust)) { /* It worked! */ maybe_move_args_size_note (last_sp_set, insn, false); + maybe_merge_cfa_adjust (last_sp_set, insn, false); delete_insn (insn); last_sp_adjust += this_adjust; continue; @@ -508,12 +587,15 @@ combine_stack_adjustments_for_block (basic_block bb) else if (STACK_GROWS_DOWNWARD ? last_sp_adjust = 0 : last_sp_adjust = 0) { - if (try_apply_stack_adjustment (insn, reflist, - last_sp_adjust + this_adjust, - -last_sp_adjust)) + if (no_unhandled_cfa (last_sp_set) + try_apply_stack_adjustment (insn, reflist