[PATCH] Force rtl templates to be inlined
From: Andi Kleen a...@linux.intel.com I noticed that with the trunk compiler a range of the new rtl inlines show up as hot in a profiler during stage1. I think that happens because stage1 is not using optimization and does not inline plain inline. And these rtl inlines are very frequently called. Mark them all with __attribute__((always_inline)) which forces inlining even with -O0. Passes bootstrap and testing on x86_64-linux. Cc: dmalc...@redhat.com include/: 2014-09-01 Andi Kleen a...@linux.intel.com * ansidecl.h (ALWAYS_INLINE): Add. gcc/: 2014-09-01 Andi Kleen a...@linux.intel.com * rtl.h (is_a_helper): Change inline to ALWAYS_INLINE. (rhs_regno): Dito. (init_costs_to_max): Dito. (init_costs_to_zero): Dito. (costs_lt_p): Dito. (costs_add_n_insns): Dito. (wi::int_traits ::get_precision): Dito. (wi::shwi): Dito. (wi::min_value): Dito. (wi::max_value): Dito. (set_rtx_cost): Dito. (get_full_set_rtx_cost): Dito. (set_src_cost): Dito. (get_full_set_src_cost): Dito. (get_mem_attrs): Dito. --- gcc/rtl.h | 111 +++-- include/ansidecl.h | 6 +++ 2 files changed, 62 insertions(+), 55 deletions(-) diff --git a/gcc/rtl.h b/gcc/rtl.h index beeed2f..d711e43 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_RTL_H #include utility +#include ansidecl.h #include statistics.h #include machmode.h #include input.h @@ -418,7 +419,7 @@ public: template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_expr_list *::test (rtx rt) { return rt-code == EXPR_LIST; @@ -447,7 +448,7 @@ public: template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_insn_list *::test (rtx rt) { return rt-code == INSN_LIST; @@ -474,7 +475,7 @@ public: template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_sequence *::test (rtx rt) { return rt-code == SEQUENCE; @@ -482,7 +483,7 @@ is_a_helper rtx_sequence *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper const rtx_sequence *::test (const_rtx rt) { return rt-code == SEQUENCE; @@ -778,7 +779,7 @@ struct GTY(()) rtvec_def { template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_insn *::test (rtx rt) { return (INSN_P (rt) @@ -790,7 +791,7 @@ is_a_helper rtx_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper const rtx_insn *::test (const_rtx rt) { return (INSN_P (rt) @@ -802,7 +803,7 @@ is_a_helper const rtx_insn *::test (const_rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_debug_insn *::test (rtx rt) { return DEBUG_INSN_P (rt); @@ -810,7 +811,7 @@ is_a_helper rtx_debug_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_nonjump_insn *::test (rtx rt) { return NONJUMP_INSN_P (rt); @@ -818,7 +819,7 @@ is_a_helper rtx_nonjump_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_jump_insn *::test (rtx rt) { return JUMP_P (rt); @@ -826,7 +827,7 @@ is_a_helper rtx_jump_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_call_insn *::test (rtx rt) { return CALL_P (rt); @@ -834,7 +835,7 @@ is_a_helper rtx_call_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_call_insn *::test (rtx_insn *insn) { return CALL_P (insn); @@ -842,7 +843,7 @@ is_a_helper rtx_call_insn *::test (rtx_insn *insn) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_jump_table_data *::test (rtx rt) { return JUMP_TABLE_DATA_P (rt); @@ -850,7 +851,7 @@ is_a_helper rtx_jump_table_data *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_jump_table_data *::test (rtx_insn *insn) { return JUMP_TABLE_DATA_P (insn); @@ -858,7 +859,7 @@ is_a_helper rtx_jump_table_data *::test (rtx_insn *insn) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_barrier *::test (rtx rt) { return BARRIER_P (rt); @@ -866,7 +867,7 @@ is_a_helper rtx_barrier *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_code_label *::test (rtx rt) { return LABEL_P (rt); @@ -874,7 +875,7 @@ is_a_helper rtx_code_label *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_code_label *::test (rtx_insn *insn) { return LABEL_P (insn); @@ -882,7 +883,7 @@ is_a_helper rtx_code_label *::test (rtx_insn *insn) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_note *::test (rtx rt) { return NOTE_P (rt); @@ -890,7 +891,7 @@ is_a_helper rtx_note *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool
Re: [PATCH] Force rtl templates to be inlined
On Tue, Sep 2, 2014 at 12:03 AM, Andi Kleen a...@firstfloor.org wrote: From: Andi Kleen a...@linux.intel.com I noticed that with the trunk compiler a range of the new rtl inlines show up as hot in a profiler during stage1. I think that happens because stage1 is not using optimization and does not inline plain inline. And these rtl inlines are very frequently called. Mark them all with __attribute__((always_inline)) which forces inlining even with -O0. I think this is wrong and should not be committed. stage1 is designed to be without optimization and there have been bugs in the past in the area of always_inline too. Thanks, Andrew Pinski Passes bootstrap and testing on x86_64-linux. Cc: dmalc...@redhat.com include/: 2014-09-01 Andi Kleen a...@linux.intel.com * ansidecl.h (ALWAYS_INLINE): Add. gcc/: 2014-09-01 Andi Kleen a...@linux.intel.com * rtl.h (is_a_helper): Change inline to ALWAYS_INLINE. (rhs_regno): Dito. (init_costs_to_max): Dito. (init_costs_to_zero): Dito. (costs_lt_p): Dito. (costs_add_n_insns): Dito. (wi::int_traits ::get_precision): Dito. (wi::shwi): Dito. (wi::min_value): Dito. (wi::max_value): Dito. (set_rtx_cost): Dito. (get_full_set_rtx_cost): Dito. (set_src_cost): Dito. (get_full_set_src_cost): Dito. (get_mem_attrs): Dito. --- gcc/rtl.h | 111 +++-- include/ansidecl.h | 6 +++ 2 files changed, 62 insertions(+), 55 deletions(-) diff --git a/gcc/rtl.h b/gcc/rtl.h index beeed2f..d711e43 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_RTL_H #include utility +#include ansidecl.h #include statistics.h #include machmode.h #include input.h @@ -418,7 +419,7 @@ public: template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_expr_list *::test (rtx rt) { return rt-code == EXPR_LIST; @@ -447,7 +448,7 @@ public: template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_insn_list *::test (rtx rt) { return rt-code == INSN_LIST; @@ -474,7 +475,7 @@ public: template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_sequence *::test (rtx rt) { return rt-code == SEQUENCE; @@ -482,7 +483,7 @@ is_a_helper rtx_sequence *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper const rtx_sequence *::test (const_rtx rt) { return rt-code == SEQUENCE; @@ -778,7 +779,7 @@ struct GTY(()) rtvec_def { template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_insn *::test (rtx rt) { return (INSN_P (rt) @@ -790,7 +791,7 @@ is_a_helper rtx_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper const rtx_insn *::test (const_rtx rt) { return (INSN_P (rt) @@ -802,7 +803,7 @@ is_a_helper const rtx_insn *::test (const_rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_debug_insn *::test (rtx rt) { return DEBUG_INSN_P (rt); @@ -810,7 +811,7 @@ is_a_helper rtx_debug_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_nonjump_insn *::test (rtx rt) { return NONJUMP_INSN_P (rt); @@ -818,7 +819,7 @@ is_a_helper rtx_nonjump_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_jump_insn *::test (rtx rt) { return JUMP_P (rt); @@ -826,7 +827,7 @@ is_a_helper rtx_jump_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_call_insn *::test (rtx rt) { return CALL_P (rt); @@ -834,7 +835,7 @@ is_a_helper rtx_call_insn *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_call_insn *::test (rtx_insn *insn) { return CALL_P (insn); @@ -842,7 +843,7 @@ is_a_helper rtx_call_insn *::test (rtx_insn *insn) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_jump_table_data *::test (rtx rt) { return JUMP_TABLE_DATA_P (rt); @@ -850,7 +851,7 @@ is_a_helper rtx_jump_table_data *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_jump_table_data *::test (rtx_insn *insn) { return JUMP_TABLE_DATA_P (insn); @@ -858,7 +859,7 @@ is_a_helper rtx_jump_table_data *::test (rtx_insn *insn) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_barrier *::test (rtx rt) { return BARRIER_P (rt); @@ -866,7 +867,7 @@ is_a_helper rtx_barrier *::test (rtx rt) template template -inline bool +ALWAYS_INLINE bool is_a_helper rtx_code_label *::test (rtx rt) { return LABEL_P (rt); @@ -874,7 +875,7 @@ is_a_helper rtx_code_label *::test (rtx rt) template template -inline
Re: [PATCH] Force rtl templates to be inlined
there have been bugs in the past in the area of always_inline too. You're arguing for my patch. It would find those bugs. -Andi
Re: [PATCH] Force rtl templates to be inlined
On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen a...@firstfloor.org wrote: there have been bugs in the past in the area of always_inline too. You're arguing for my patch. It would find those bugs. No I am arguing against it since the older versions of GCC we cannot change. Thanks, Andrew -Andi
Re: [PATCH] gcc-ar: Turn plugin not found case into a warning
On Mon, Sep 1, 2014 at 6:33 PM, Andi Kleen a...@firstfloor.org wrote: From: Andi Kleen a...@linux.intel.com Only give a warning when gcc-ar/nm/ranlib cannot find the plugin. In this case do not pass a plugin argument to the wrapped program. This should make it work on non linker plugin systems, so that the build system can use it unconditionally. Hmm, maybe conditionalize the error on HAVE_LTO_PLUGIN == 2 instead? And not warn for HAVE_LTO_PLUGIN == 0 at all? Richard. gcc/: 2014-09-01 Andi Kleen a...@linux.intel.com * gcc-ar (main): Only warn when plugin not found. --- gcc/gcc-ar.c | 27 --- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/gcc/gcc-ar.c b/gcc/gcc-ar.c index fdff89c..e27ea3b 100644 --- a/gcc/gcc-ar.c +++ b/gcc/gcc-ar.c @@ -182,8 +182,8 @@ main (int ac, char **av) plugin = find_a_file (target_path, LTOPLUGINSONAME, R_OK); if (!plugin) { - fprintf (stderr, %s: Cannot find plugin '%s'\n, av[0], LTOPLUGINSONAME); - exit (1); + fprintf (stderr, %s: Warning: Cannot find plugin '%s'\n, av[0], LTOPLUGINSONAME); + /* Fall back to not using a plugin. */ } /* Find the wrapped binutils program. */ @@ -204,15 +204,20 @@ main (int ac, char **av) } /* Create new command line with plugin */ - nargv = XCNEWVEC (const char *, ac + 4); - nargv[0] = exe_name; - nargv[1] = --plugin; - nargv[2] = plugin; - if (is_ar av[1] av[1][0] != '-') -av[1] = concat (-, av[1], NULL); - for (k = 1; k ac; k++) -nargv[2 + k] = av[k]; - nargv[2 + k] = NULL; + if (plugin != NULL) +{ + nargv = XCNEWVEC (const char *, ac + 4); + nargv[0] = exe_name; + nargv[1] = --plugin; + nargv[2] = plugin; + if (is_ar av[1] av[1][0] != '-') +av[1] = concat (-, av[1], NULL); + for (k = 1; k ac; k++) +nargv[2 + k] = av[k]; + nargv[2 + k] = NULL; +} + else +nargv = CONST_CAST2 (const char **, char **, av); /* Run utility */ /* ??? the const is misplaced in pex_one's argv? */ -- 2.1.0
[PATCH] Fix PR62695
The auto_vec replacement missed one truncation. Committed as obvious. Richard. 2014-09-02 Richard Biener rguent...@suse.de PR tree-optimization/62695 * tree-ssa-structalias.c (find_func_clobbers): Add missing vector truncate. * gfortran.dg/pr62695.f90: New testcase. Index: gcc/tree-ssa-structalias.c === --- gcc/tree-ssa-structalias.c (revision 214795) +++ gcc/tree-ssa-structalias.c (working copy) @@ -5042,6 +5042,7 @@ find_func_clobbers (struct function *fn, get_constraint_for_address_of (arg, rhsc); FOR_EACH_VEC_ELT (rhsc, j, rhsp) process_constraint (new_constraint (lhs, *rhsp)); + rhsc.truncate (0); } /* Build constraints for propagating clobbers/uses along the Index: gcc/testsuite/gfortran.dg/pr62695.f90 === --- gcc/testsuite/gfortran.dg/pr62695.f90 (revision 0) +++ gcc/testsuite/gfortran.dg/pr62695.f90 (working copy) @@ -0,0 +1,27 @@ +! { dg-do compile } +! { dg-options -O -fipa-pta } + +MODULE dbcsr_dist_operations + TYPE dbcsr_mp_obj + END TYPE dbcsr_mp_obj + INTERFACE +SUBROUTINE dbcsr_mp_new(mp_env, pgrid, mp_group, mynode, numnodes, myprow, + mypcol) + IMPORT + TYPE(dbcsr_mp_obj), INTENT(OUT) :: mp_env + INTEGER, DIMENSION(0:, 0:), INTENT(IN) :: pgrid +END SUBROUTINE dbcsr_mp_new + END INTERFACE +CONTAINS + SUBROUTINE dbcsr_mp_make_env (mp_env, mp_group, + nprocs, pgrid_dims, error) +TYPE(dbcsr_mp_obj), INTENT(OUT) :: mp_env + OPTIONAL :: pgrid_dims +INTEGER :: error_handle, group, mynode, +numnodes, pcol, prow +INTEGER, ALLOCATABLE, DIMENSION(:, :):: pgrid +INTEGER, DIMENSION(2):: coord, myploc, npdims +CALL dbcsr_mp_new (mp_env, pgrid, group, mynode, numnodes, + myprow=myploc(1), mypcol=myploc(2)) + END SUBROUTINE dbcsr_mp_make_env +END MODULE dbcsr_dist_operations
Re: [PATCH] support ggc hash_map and hash_set
On Tue, Sep 2, 2014 at 3:56 AM, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, There are still some issues to make this work really nicely, but this part is probably good enough its worth reviewing. For one thing you can't use ggc hash_map or set in front ends with some types or gengtype will decide to put the overloads of the marking routines it provides in a front end file instead of the one it choose before breaking other front ends. However that seems to be an unrelated issue you can trigger it without using hash_map/set, so we might as well solve it separetly. I had to have the entry marking functions for set deligate to the traits class because gcc 4.9.1 issues clearly bogus errors if you inline the code from the traits implementation. We may well want to make map work the same way at some point to enable some of the special GTY attributes like if_marked, but it doesn't seem to be necessary right now. bootstrapped + regtested without regressions on x86_64-unknown-linux-gnu, ok? Ok if you make the gcc_assert()s in the marking routines gcc_checking_assert()s. Btw - do manual markers need any special support for finalizers? Does the hash table need any special support to make finalizers efficient (avoid recording for each entry if stored in-place?) Thanks, Richard. Trev gcc/ChangeLog: 2014-09-01 Trevor Saunders tsaund...@mozilla.com * alloc-pool.c: Include coretypes.h. * cgraph.h, dbxout.c, dwarf2out.c, except.c, except.h, function.c, function.h, symtab.c, tree-cfg.c, tree-eh.c: Use hash_map and hash_set instead of htab. * ggc-page.c (in_gc): New variable. (ggc_free): Do nothing if a collection is taking place. (ggc_collect): Set in_gc appropriately. * ggc.h (gt_ggc_mx(const char *)): New function. (gt_pch_nx(const char *)): Likewise. (gt_ggc_mx(int)): Likewise. (gt_pch_nx(int)): Likewise. * hash-map.h (hash_map::hash_entry::ggc_mx): Likewise. (hash_map::hash_entry::pch_nx): Likewise. (hash_map::hash_entry::pch_nx_helper): Likewise. (hash_map::hash_map): Adjust. (hash_map::create_ggc): New function. (gt_ggc_mx): Likewise. (gt_pch_nx): Likewise. * hash-set.h (default_hashset_traits::ggc_mx): Likewise. (default_hashset_traits::pch_nx): Likewise. (hash_set::hash_entry::ggc_mx): Likewise. (hash_set::hash_entry::pch_nx): Likewise. (hash_set::hash_entry::pch_nx_helper): Likewise. (hash_set::hash_set): Adjust. (hash_set::create_ggc): New function. (hash_set::elements): Likewise. (gt_ggc_mx): Likewise. (gt_pch_nx): Likewise. * hash-table.h (hash_table::hash_table): Adjust. (hash_table::m_ggc): New member. (hash_table::~hash_table): Adjust. (hash_table::expand): Likewise. (hash_table::empty): Likewise. (gt_ggc_mx): New function. (hashtab_entry_note_pointers): Likewise. (gt_pch_nx): Likewise. diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c index 0d31835..bfaa0e4 100644 --- a/gcc/alloc-pool.c +++ b/gcc/alloc-pool.c @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see #include config.h #include system.h +#include coretypes.h #include alloc-pool.h #include hash-table.h #include hash-map.h diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 879899c..030a1c7 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1604,7 +1604,6 @@ struct cgraph_2node_hook_list; /* Map from a symbol to initialization/finalization priorities. */ struct GTY(()) symbol_priority_map { - symtab_node *symbol; priority_type init; priority_type fini; }; @@ -1872,7 +1871,7 @@ public: htab_t GTY((param_is (symtab_node))) assembler_name_hash; /* Hash table used to hold init priorities. */ - htab_t GTY ((param_is (symbol_priority_map))) init_priority_hash; + hash_mapsymtab_node *, symbol_priority_map *init_priority_hash; FILE* GTY ((skip)) dump_file; diff --git a/gcc/dbxout.c b/gcc/dbxout.c index 946f1d1..d856bdd 100644 --- a/gcc/dbxout.c +++ b/gcc/dbxout.c @@ -2484,12 +2484,9 @@ dbxout_expand_expr (tree expr) /* Helper function for output_used_types. Queue one entry from the used types hash to be output. */ -static int -output_used_types_helper (void **slot, void *data) +bool +output_used_types_helper (tree const type, vectree *types_p) { - tree type = (tree) *slot; - vectree *types_p = (vectree *) data; - if ((TREE_CODE (type) == RECORD_TYPE || TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == QUAL_UNION_TYPE @@ -2502,7 +2499,7 @@ output_used_types_helper (void **slot, void *data) TREE_CODE (TYPE_NAME (type)) == TYPE_DECL) types_p-quick_push (TYPE_NAME (type)); - return 1; + return true; } /* This is a qsort callback which sorts types and declarations into a @@ -2544,8 +2541,9 @@ output_used_types (void) int i; tree
Re: [PATCH] Force rtl templates to be inlined
On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote: On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote: there have been bugs in the past in the area of always_inline too. You're arguing for my patch. It would find those bugs. No I am arguing against it since the older versions of GCC we cannot change. Should such bugs turn up, we can account for them in ansidecl.h. I think Andi's patch should go in. Ciao! Steven
Re: [PATCH] Force rtl templates to be inlined
On Sep 2, 2014, at 1:36 AM, Steven Bosscher stevenb@gmail.com wrote: On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote: On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote: there have been bugs in the past in the area of always_inline too. You're arguing for my patch. It would find those bugs. No I am arguing against it since the older versions of GCC we cannot change. Should such bugs turn up, we can account for them in ansidecl.h. I think Andi's patch should go in. I does hurt debug ability with older compilers too. So if we need to figure out why stage is being miscompiled it is harder to figure how to work around it. I think stage should really be -O0 even with respect of inline. I think we should never force inline inside gcc even at -O0 as it is just a hack (we know it as we added the attribute in the first place). Thanks, Andrew Ciao! Steven
Re: [PINGv2][PATCH] Fix for PR 61875
On 09/01/2014 08:28 PM, Jakub Jelinek wrote: This situation occurs when somebody decides to build GCC with -fexeptions and -frtti which are forbidden for libsanitizer. I don't see a reason for this, simply don't do that, libsanitizer AFAIK isn't the only library where it is highly undesirable to have these flags in CXXFLAGS. libatomic and libgtm are another examples of libraries that shouldn't be compiled with those flags. Thanks, Jakub. Could someone close https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61875 as Invalid? -Y
Re: [PATCH] Force rtl templates to be inlined
On Tue, Sep 2, 2014 at 10:36 AM, Steven Bosscher stevenb@gmail.com wrote: On Tue, Sep 2, 2014 at 9:22 AM, Andrew Pinski wrote: On Tue, Sep 2, 2014 at 12:20 AM, Andi Kleen wrote: there have been bugs in the past in the area of always_inline too. You're arguing for my patch. It would find those bugs. No I am arguing against it since the older versions of GCC we cannot change. Should such bugs turn up, we can account for them in ansidecl.h. I think Andi's patch should go in. I disagree. always-inline isn't an optimization attribute but a correctness one. Instead we should not build stage1 with -O0 if we detect a reasonably recent GCC host compiler (say one that is still maintained). Or we simply should make -finline work at -O0 (I suppose it might already work?) and use it. Richard. Ciao! Steven
Ping^2 - RE: [PATCH] Add target hook to override DWARF2 frame register size
Ping^2 Added Jason as maintainer for dwarf related things. This hook will be used in the following patch: https://gcc.gnu.org/ml/gcc-patches/2014-08/msg02172.html Thanks, Matthew Ping. Thanks, Matthew Sent: 07 August 2014 07:21 Please don't add target macros. Add a hook if you must, but we're supposed to remove target macros, not add new ones :-) Thanks Steven, I wasn't sure if there were still things that were acceptable as macros. There's a lot to get rid of still. Updated patch using a target hook. I've opted to move the logic which handles part clobbered registers into the default implementation as that seemed natural. I have no real preference if others feel that is the wrong thing to do. This will be used by an up-coming patch for MIPS O32 ABI extensions. Bootstrapped and regtested on x86_64-linux-gnu. Thanks, Matthew gcc/ * target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook. * targhooks.c (default_dwarf_frame_reg_mode): New function. * targhooks.h (default_dwarf_frame_reg_mode): New prototype. * doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document. * doc/tm.texi: Regenerate. * dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode selection logic to default_dwarf_frame_reg_mode. --- gcc/doc/tm.texi| 7 +++ gcc/doc/tm.texi.in | 2 ++ gcc/dwarf2cfi.c| 4 +--- gcc/target.def | 11 +++ gcc/targhooks.c| 13 + gcc/targhooks.h| 1 + 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index dd72b98..aa92ce4 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -6604,6 +6604,8 @@ the target supports DWARF 2 frame unwind information. @hook TARGET_DWARF_REGISTER_SPAN +@hook TARGET_DWARF_FRAME_REG_MODE + @hook TARGET_INIT_DWARF_REG_SIZES_EXTRA @hook TARGET_ASM_TTYPE diff --git a/gcc/dwarf2cfi.c b/gcc/dwarf2cfi.c index 85cfb60..a673106 100644 --- a/gcc/dwarf2cfi.c +++ b/gcc/dwarf2cfi.c @@ -271,11 +271,9 @@ expand_builtin_init_dwarf_reg_sizes (tree address) if (rnum DWARF_FRAME_REGISTERS) { HOST_WIDE_INT offset = rnum * GET_MODE_SIZE (mode); - enum machine_mode save_mode = reg_raw_mode[i]; HOST_WIDE_INT size; + enum machine_mode save_mode = targetm.dwarf_frame_reg_mode (i); - if (HARD_REGNO_CALL_PART_CLOBBERED (i, save_mode)) - save_mode = choose_hard_reg_mode (i, 1, true); if (dnum == DWARF_FRAME_RETURN_COLUMN) { if (save_mode == VOIDmode) diff --git a/gcc/target.def b/gcc/target.def index 3a41db1..d5aba51 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3216,6 +3216,17 @@ If not defined, the default is to return @code{NULL_RTX}., rtx, (rtx reg), hook_rtx_rtx_null) +/* Given a register return the mode of the corresponding DWARF frame + register. */ +DEFHOOK +(dwarf_frame_reg_mode, + Given a register, this hook should return the mode which the\n\ +corresponding Dwarf frame register should have. This is normally\n\ +used to return a smaller mode than the raw mode to prevent call\n\ +clobbered parts of a register altering the frame register size., + enum machine_mode, (int regno), + default_dwarf_frame_reg_mode) + /* If expand_builtin_init_dwarf_reg_sizes needs to fill in table entries not corresponding directly to registers below FIRST_PSEUDO_REGISTER, this hook should generate the necessary diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 0f27a5a..765bf3b 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1456,6 +1456,19 @@ default_debug_unwind_info (void) return UI_NONE; } +/* Determine the correct mode for a Dwarf frame register that represents + register REGNO. */ + +enum machine_mode +default_dwarf_frame_reg_mode (int regno) +{ + enum machine_mode save_mode = reg_raw_mode[regno]; + + if (HARD_REGNO_CALL_PART_CLOBBERED (regno, save_mode)) +save_mode = choose_hard_reg_mode (regno, 1, true); + return save_mode; +} + /* To be used by targets where reg_raw_mode doesn't return the right mode for registers used in apply_builtin_return and apply_builtin_arg. */ diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 4be33f8..fa88679 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -194,6 +194,7 @@ extern int default_label_align_max_skip (rtx); extern int default_jump_align_max_skip (rtx); extern section * default_function_section(tree decl, enum node_frequency freq, bool startup, bool exit); +extern enum machine_mode default_dwarf_frame_reg_mode (int); extern enum machine_mode default_get_reg_raw_mode (int); extern bool default_keep_leaf_when_profiled (); -- 1.9.4
[PATCH][match-and-simplify] Complete conversion patterns
This completes conversion patterns (apart from commented case which needs a new IL feature). Bootstrapped on x86_64-unknown-linux-gnu, applied. Richard. 2014-09-02 Richard Biener rguent...@suse.de * match-conversions.pd: Add more patterns. Index: gcc/match-conversions.pd === --- gcc/match-conversions.pd(revision 214795) +++ gcc/match-conversions.pd(working copy) @@ -1,21 +1,42 @@ -#if GIMPLE -/* Basic strip-useless-type-conversions. */ -(simplify - (convert @0) - (if (useless_type_conversion_p (type, TREE_TYPE (@0))) - @0)) -#endif - - /* From fold_unary in order of appearance. */ -#if GENERIC -/* For GIMPLE this is convered by the useless_type_conversion stripping. */ +/* Re-association barriers around constants and other re-association + barriers can be removed. */ (simplify - (convert @0) - (if (type == TREE_TYPE (@0)) - @0)) -#endif + (paren CONSTANT_CLASS_P@0) + @0) +(simplify + (paren (paren @0)) + (paren @0)) + +/* Basic strip-useless-type-conversions / strip_nops. */ +(for cvt in convert view_convert + (simplify + (cvt @0) + (if ((GIMPLE useless_type_conversion_p (type, TREE_TYPE (@0))) + || (GENERIC type == TREE_TYPE (@0))) + @0))) + +/* If we have (type) (a CMP b) and type is an integral type, return + new expression involving the new type. Canonicalize + (type) (a CMP b) to (a CMP b) ? (type) true : (type) false for + non-integral type. + Do not fold the result as that would not simplify further, also + folding again results in recursions. */ +/* ??? Eh, do we want sth like (define-ops cmp lt le eq ...) to not + repeat this too many times? */ +(for cmp in lt le eq ne ge gt unordered ordered unlt unle ungt unge uneq ltgt + (simplify + (convert (cmp@2 @0 @1)) + (if (TREE_CODE (type) == BOOLEAN_TYPE) +(cmp @0 @1)) + /* Not sure if the following makes sense for GIMPLE. */ + (if (!INTEGRAL_TYPE_P (type) !VOID_TYPE_P (type) +TREE_CODE (type) != VECTOR_TYPE) +(cond @2 + { constant_boolean_node (true, type); } + { constant_boolean_node (false, type); } + /* Convert (T1)(~(T2)X) into ~(T1)X if T1 and T2 are integral types of the same precision, and X is an integer type not narrower than @@ -29,6 +50,41 @@ TYPE_PRECISION (type) = TYPE_PRECISION (TREE_TYPE (@1))) (bit_not (convert @1 +/* Convert (T1)(X * Y) into (T1)X * (T1)Y if T1 is narrower than the + type of X and Y (integer types only). */ +(simplify + (convert (mult @0 @1)) + (if (INTEGRAL_TYPE_P (type) + INTEGRAL_TYPE_P (TREE_TYPE (@0)) + TYPE_PRECISION (type) TYPE_PRECISION (TREE_TYPE (@0))) + (if (TYPE_OVERFLOW_WRAPS (type)) + (mult (convert @0) (convert @1) +#if 0 + /* 1) We can't handle the two-conversions-in-a-row below. + 2) We can't properly specify the type for the inner conversion +(unsigned_type_for). Suggested syntax below. */ + (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); } + (convert (mult (convert:utype @0) (convert:utype @1 +#endif + + +/* For integral conversions with the same precision or pointer + conversions use a NOP_EXPR instead. */ +(simplify + (view_convert @0) + (if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) +(INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) +TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))) + (convert @0))) + +/* Strip inner integral conversions that do not change the precision. */ +(simplify + (view_convert (convert@0 @1)) + (if ((INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) +(INTEGRAL_TYPE_P (TREE_TYPE (@1)) || POINTER_TYPE_P (TREE_TYPE (@1))) +(TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1 + (view_convert @1))) + /* From tree-ssa-forwprop.c:combine_conversions. */ @@ -64,12 +120,9 @@ handled below, if we are converting something to its own type via an object of identical or wider precision, neither conversion is needed. */ - (if ( -#if GIMPLE - useless_type_conversion_p (type, inside_type) -#else - TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (inside_type) -#endif + (if (((GIMPLE useless_type_conversion_p (type, inside_type)) +|| (GENERIC + TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (inside_type))) (((inter_int || inter_ptr) final_int) || (inter_float final_float)) inter_prec = final_prec)
Re: [PATCH C++] - SD-6 Implementation Part 3 - .
On 01/09/14 21:46 -0400, Ed Smith-Rowland wrote: Index: include/bits/stl_function.h === --- include/bits/stl_function.h (revision 214680) +++ include/bits/stl_function.h (working copy) @@ -217,6 +217,10 @@ }; #if __cplusplus 201103L + +#define __cpp_lib_transparent_operators 201210 +#define __cpp_lib_generic_associative_lookup 201304 The generic associative lookup feature is not supported. Index: testsuite/experimental/feat-lib-fund.cc === --- testsuite/experimental/feat-lib-fund.cc (revision 0) +++ testsuite/experimental/feat-lib-fund.cc (working copy) @@ -0,0 +1,25 @@ +// { dg-options -std=gnu++14 } +// { dg-do compile } + +#include experimental/optional +#include experimental/string_view + +#if !__has_include(experimental/optional) +# error experimental/optional +#endif + +//#if !__has_include(experimental/net) +//# error experimental/net +//#endif + +//#if !__has_include(experimental/any) +//# error experimental/any +//#endif This can be uncommented, experimental/any is available. OK with those changes. Once committed, can you update https://gcc.gnu.org/gcc-5/changes.html to say GCC follows the SD-6 recommendations? Thanks.
Re: [PATCH] Add -fno-instrument-function
On Mon, Sep 1, 2014 at 10:25 PM, Andi Kleen a...@firstfloor.org wrote: From: Andi Kleen a...@linux.intel.com [This was an old patch of mine that has been posted before, but never made it in] This adds a new C/C++ option to force __attribute__((no_instrument_function)) on every function compiled. This is useful together with LTO. You may want to have the whole program compiled with -pg and have to specify that in the LTO link, but want to disable it for some specific files. As the option works on the frontend level it is already passed through properly by LTO. Without LTO it is equivalent to not specifing -pg or -mfentry. This fixes some missing functionality in the Linux kernel LTO port, in particular it allows using the function tracer with LTO kernels. Longer term it would be nicer if all suitable options were handled like this for LTO by turning them into attributes, but that would be a much larger project. Passed bootstrap and test suite on x86_64-linux. Ok? Hmm, why not make -no-pg (does that exist?) and/or -mno-fentry do this? That is, I don't see the need for a new option. Or do it the other way around - change the default to DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT and make -pg/-mfentry unset that (or have DECL_INSTRUMENT_FUNCTION_ENTRY_EXIT). Richard. gcc/: 2014-09-01 Andi Kleen a...@linux.intel.com * c.opt (fno-instrument-function): Document. gcc/c: 2014-09-01 Andi Kleen a...@linux.intel.com * c-decl.c (start_function): Handle force_no_instrument_function gcc/cp: 2014-09-01 Andi Kleen a...@linux.intel.com * decl.c (start_preparsed_function): Handle force_no_instrument_function gcc/testsuite: 2014-09-01 Andi Kleen a...@linux.intel.com * g++.dg/fno-instrument-function.C: Add. * gcc.dg/fno-instrument-function.c: Add. --- gcc/c-family/c.opt | 4 gcc/c/c-decl.c | 3 +++ gcc/cp/decl.c | 3 +++ gcc/doc/invoke.texi| 8 +++- gcc/testsuite/g++.dg/fno-instrument-function.C | 18 ++ gcc/testsuite/gcc.dg/fno-instrument-function.c | 24 6 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/fno-instrument-function.C create mode 100644 gcc/testsuite/gcc.dg/fno-instrument-function.c diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 210a099..2aabd23 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1118,6 +1118,10 @@ Enum(ivar_visibility) String(public) Value(IVAR_VISIBILITY_PUBLIC) EnumValue Enum(ivar_visibility) String(package) Value(IVAR_VISIBILITY_PACKAGE) +fno-instrument-function +C C++ ObjC ObjC++ RejectNegative Report Var(force_no_instrument_function) +Force __attribute__((no_instrument_function)) for all functions in translation unit. + fnonansi-builtins C++ ObjC++ Var(flag_no_nonansi_builtin, 0) diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index b4995a6..493240f 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -8044,6 +8044,9 @@ start_function (struct c_declspecs *declspecs, struct c_declarator *declarator, if (current_scope == file_scope) maybe_apply_pragma_weak (decl1); + if (force_no_instrument_function) +DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (decl1) = 1; + /* Warn for unlikely, improbable, or stupid declarations of `main'. */ if (warn_main MAIN_NAME_P (DECL_NAME (decl1))) { diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index d03f8a4..505ad50 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -13251,6 +13251,9 @@ start_preparsed_function (tree decl1, tree attrs, int flags) lookup_attribute (noinline, attrs)) warning (0, inline function %q+D given attribute noinline, decl1); + if (force_no_instrument_function) +DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (decl1) = 1; + /* Handle gnu_inline attribute. */ if (GNU_INLINE_P (decl1)) { diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d15d4a9..51b8d20 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -169,7 +169,7 @@ in the following sections. -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol -fno-asm -fno-builtin -fno-builtin-@var{function} @gol -fhosted -ffreestanding -fopenmp -fopenmp-simd -fms-extensions @gol --fplan9-extensions -trigraphs -traditional -traditional-cpp @gol +-fplan9-extensions -trigraphs -traditional -traditional-cpp -fno-instrument-function @gol -fallow-single-precision -fcond-mismatch -flax-vector-conversions @gol -fsigned-bitfields -fsigned-char @gol -funsigned-bitfields -funsigned-char} @@ -1971,6 +1971,12 @@ Allow implicit conversions between vectors with differing numbers of elements and/or incompatible element types. This option should not be used for new code. +@item -fno-instrument-function
Re: [PATCH] Avoid inserting dead code in PRE, do less work
On Mon, 1 Sep 2014, Richard Biener wrote: The following patch tries to work towards fixing PR62291 by moving NEW_SETS/AVAIL_OUT adding strictly to insert_into_preds_of_block and the value / expression we wanted to insert. If doing that for other unrelated expressions this may cause fake partial redundancies to be detected and dead code will be inserted such as for gcc.dg/tree-ssa/ssa-pre-28.c which is now fixed. The idea is that we could now simulate insertion and its recursion without actually performing the insertions (which requires AVAIL_OUT) and instead postpone that to elimination time. Well. Idea... Bootstrap and regtest running on x86_64-unknown-linux-gnu. So this doesn't work (it wrecks gcc.c-torture/compile/pr43415.c which endlessly inserts via find_or_generate_expression). Which get's me back to the point that find_or_generate_expression isn't a good implementation to fix PR37997 (gcc.dg/tree-ssa/ssa-pre-28.c). Anyway, I'll put this patch on hold (though I certainly would like to remove that PR37997-fixing code ...). Richard. Richard. 2014-09-01 Richard Biener rguent...@suse.de * tree-ssa-pre.c (find_or_generate_expression): Expand comment. (create_expression_by_pieces): Do not add to NEW_SETS or AVAIL_OUT here. (insert_into_preds_of_block): Instead do it here and only for the partial redundant value we inserted. Index: gcc/tree-ssa-pre.c === --- gcc/tree-ssa-pre.c(revision 214795) +++ gcc/tree-ssa-pre.c(working copy) @@ -2797,9 +2797,11 @@ find_or_generate_expression (basic_block return NULL_TREE; } - /* It must be a complex expression, so generate it recursively. Note - that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c - where the insert algorithm fails to insert a required expression. */ + /* It must be a complex expression, so generate it recursively. + Note that this is only necessary to handle cases like + gcc.dg/tree-ssa/ssa-pre-28.c where the insert algorithm fails to + insert a required expression because the dependent expression + isn't partially redundant. */ bitmap exprset = value_expressions[lookfor]; bitmap_iterator bi; unsigned int i; @@ -2846,7 +2848,6 @@ create_expression_by_pieces (basic_block unsigned int value_id; gimple_stmt_iterator gsi; tree exprtype = type ? type : get_expr_type (expr); - pre_expr nameexpr; gimple newstmt; switch (expr-kind) @@ -2941,17 +2942,12 @@ create_expression_by_pieces (basic_block { gimple stmt = gsi_stmt (gsi); tree forcedname = gimple_get_lhs (stmt); - pre_expr nameexpr; if (TREE_CODE (forcedname) == SSA_NAME) { bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname)); VN_INFO_GET (forcedname)-valnum = forcedname; VN_INFO (forcedname)-value_id = get_next_value_id (); - nameexpr = get_or_alloc_expr_for_name (forcedname); - add_to_value (VN_INFO (forcedname)-value_id, nameexpr); - bitmap_value_replace_in_set (NEW_SETS (block), nameexpr); - bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr); } } gimple_seq_add_seq (stmts, forced_stmts); @@ -2979,12 +2975,6 @@ create_expression_by_pieces (basic_block VN_INFO (name)-valnum = sccvn_valnum_from_value_id (value_id); if (VN_INFO (name)-valnum == NULL_TREE) VN_INFO (name)-valnum = name; - gcc_assert (VN_INFO (name)-valnum != NULL_TREE); - nameexpr = get_or_alloc_expr_for_name (name); - add_to_value (value_id, nameexpr); - if (NEW_SETS (block)) -bitmap_value_replace_in_set (NEW_SETS (block), nameexpr); - bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr); pre_stats.insertions++; if (dump_file (dump_flags TDF_DETAILS)) @@ -3061,7 +3051,11 @@ insert_into_preds_of_block (basic_block nophi = true; continue; } - avail[pred-dest_idx] = get_or_alloc_expr_for_name (builtexpr); + pre_expr nameexpr = get_or_alloc_expr_for_name (builtexpr); + avail[pred-dest_idx] = nameexpr; + add_to_value (get_expr_value_id (eprime), nameexpr); + bitmap_value_replace_in_set (NEW_SETS (bprime), nameexpr); + bitmap_value_replace_in_set (AVAIL_OUT (bprime), nameexpr); insertions = true; } else if (eprime-kind == CONSTANT) Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-28.c === --- gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-28.c(revision 214795) +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-28.c(working copy) @@ -15,7 +15,13 @@ int foo (int i, int b, int result) } /* We should insert i + 1 into the if (b) path as well as the simplified - i + 1 -2 expression. And
[C++ Patch] DR 1453
Hi, while looking into c++/58102 and DR 1405 I noticed that we don't implement DR 1453 either, sort of dual issue with volatile instead of mutable. Tested x86_64-linux. Thanks, Paolo. /cp 2014-09-02 Paolo Carlini paolo.carl...@oracle.com DR 1453 * class.c (check_field_decls): A class of literal type cannot have volatile non-static data members and base classes. (explain_non_literal_class): Update. /testsuite 2014-09-02 Paolo Carlini paolo.carl...@oracle.com DR 1453 * g++.dg/cpp0x/constexpr-volatile.C: New. * g++.dg/ext/is_literal_type2.C: Likewise. Index: cp/class.c === --- cp/class.c (revision 214808) +++ cp/class.c (working copy) @@ -3528,9 +3528,11 @@ check_field_decls (tree t, tree *access_decls, CLASSTYPE_NON_AGGREGATE (t) = 1; /* If at least one non-static data member is non-literal, the whole - class becomes non-literal. Note: if the type is incomplete we -will complain later on. */ - if (COMPLETE_TYPE_P (type) !literal_type_p (type)) + class becomes non-literal. Per Core/1453, volatile non-static +data members and base classes are also not allowed. +Note: if the type is incomplete we will complain later on. */ + if (COMPLETE_TYPE_P (type) + (!literal_type_p (type) || CP_TYPE_VOLATILE_P (type))) CLASSTYPE_LITERAL_P (t) = false; /* A standard-layout class is a class that: @@ -5431,6 +5433,9 @@ explain_non_literal_class (tree t) if (CLASS_TYPE_P (ftype)) explain_non_literal_class (ftype); } + if (CP_TYPE_VOLATILE_P (ftype)) + inform (0, non-static data member %q+D has + volatile type, field); } } } Index: testsuite/g++.dg/cpp0x/constexpr-volatile.C === --- testsuite/g++.dg/cpp0x/constexpr-volatile.C (revision 0) +++ testsuite/g++.dg/cpp0x/constexpr-volatile.C (working copy) @@ -0,0 +1,26 @@ +// DR 1453 +// { dg-do compile { target c++11 } } + +struct S { + constexpr S() : n{} { } + volatile int n; +}; + +constexpr S s; // { dg-error literal } + +struct Z { + volatile int m; +}; + +struct T { + constexpr T() : n{} { } + Z n; +}; + +constexpr T t; // { dg-error literal } + +struct U : Z { + constexpr U() : Z{} { } +}; + +constexpr U u; // { dg-error literal } Index: testsuite/g++.dg/ext/is_literal_type2.C === --- testsuite/g++.dg/ext/is_literal_type2.C (revision 0) +++ testsuite/g++.dg/ext/is_literal_type2.C (working copy) @@ -0,0 +1,26 @@ +// DR 1453 +// { dg-do compile { target c++11 } } + +struct S { + constexpr S() : n{} { } + volatile int n; +}; + +static_assert(!__is_literal_type(S), ); + +struct Z { + volatile int m; +}; + +struct T { + constexpr T() : n{} { } + Z n; +}; + +static_assert(!__is_literal_type(T), ); + +struct U : Z { + constexpr U() : Z{} { } +}; + +static_assert(!__is_literal_type(U), );
Re: [patch] No allocation for empty unordered containers
On 30/08/14 20:03 +0200, François Dumont wrote: Any news for my patch proposals ? Regarding documentation of default minimum number of buckets, I don't know where it has been documented but why do we need to document it separately ? Could it be taken care by Doxygen ? Can't it get the default value from the code itself ? If not we could document it ourself next to the code rather than in a distinct file. It's OK to document it with a Doxygen comment, although I think it would be better in doc/xml/manual/containers.xml. I'm reviewing the rest of the patch today, thanks for you patience.
[RFA:] testsuite: robustify g++.old-deja/g++.eh/badalloc1.C for 64-bit systems
In a native x86_64-linux toolchain in which eh-table-registration is done explicitly (i.e. dl_iterate_phdr and PT_GNU_EH_FRAME is *not* assumed, as that eliminates the issue), the memory overhead for exception-initialization goes beyond the 32768 bytes assumed in badalloc1.C and the test fails for reasons not intended by the test. You may think that's uninteresting, but presumably there are other 64-bit-systems, perhaps even GNU-based, that act similarly. For EH tables registered with the __register_frame_info scheme (let's call it eh-registry as opposed to eh-phdr), the incoming tables are not assumed to be sorted. EH initialization then does an initial sorting at the first exception, in which there are calls to malloc for arrays for the sorted tables. This is noticable in badalloc1.C as it overrides malloc. All this happens at that first try{fn_throw();} with the related comment, i.e. before the fail = 1 and the actual test in badalloc1.C. (There are other calls to malloc for other unrelated initialization tasks, but for glibc systems these resolve to a malloc in the dynamic linker.) The sequence of calls to malloc in badalloc1.C go like this: Size Purpose Function name 132Core exception data. __cxxabiv1::__cxa_allocate_exception 88 EH table for badalloc1 start_fde_sort program, 9 FDE:s for the (ditto) linear table. 88 Ditto the erratic table. 19176 Similar 2395 FDE:s for the libstdc++ library, linear. 19176 Ditto the erratic table. *boom* The boom is simply the arena size check failing in the badalloc1.C malloc: // Verify that we didn't run out of memory before getting initialized. if (pos arena_size) abort (); It seems the arena_size=32768 bytes estimate was from the 32-bit-systems era (svn logs indicate 2000). Just scaling it accordingly works fine, and we get to see the rest of the allocations: 1344 166 FDE:s in libgcc_s, linear table 1344 Ditto erratic. For a -m32 run, the corresponding allocation-size series is 100, 66, 66, 9592, 9592, 648, 648. (*) for one reason or another. Maybe the GNU linker is not used or *really* outdated (before 2001-12-13, 2.12) or glibc is *really* outdated (before 2001-07-25, 2.2.4). Or inhibit_libc accidentally set, or a compatibility scheme forcing eh-registry. More about that in a later post. Having investigated the related test-suite failure, I suggest to eliminate it by robustifying the arena size a bit (or 32 :). I don't touch the other arena_size definitions because (1) those numbers are presumably already fine for the related systems, those still alive, and (2) I don't like changing stuff I cannot test. Other observations: I guess there are similar copyright notices in the test-suite may need some general attention. The xfail list may benefit from tweaking; at least replacing the xstormy16 with int32plus or similar to cover the array size overflowing 16-bit addresses. Ok to commit? (Note the changelog-conditional-prefix continued-line format.) gcc/testsuite: * g++.old-deja/g++.eh/badalloc1.C [!STACK_SIZE !__FreeBSD__] [!__sun__ !__hpux__] (arena_size): Scale according to target pointer size. Index: g++.old-deja/g++.eh/badalloc1.C === --- g++.old-deja/g++.eh/badalloc1.C (revision 214810) +++ g++.old-deja/g++.eh/badalloc1.C (working copy) @@ -3,7 +3,7 @@ // itself call malloc(), and will fail if there is no more // memory available. // { dg-do run { xfail { { xstormy16-*-* *-*-darwin[3-7]* } || vxworks_rtp } } } -// Copyright (C) 2000, 2002, 2003, 2010, 2012 Free Software Foundation, Inc. +// Copyright (C) 2000, 2002, 2003, 2010, 2012, 2014 Free Software Foundation, Inc. // Contributed by Nathan Sidwell 6 June 2000 nat...@codesourcery.com // Check we can throw a bad_alloc exception when malloc dies. @@ -23,7 +23,10 @@ const int arena_size = 256; // FreeBSD 5 now requires over 131072 bytes. const int arena_size = 262144; #else -const int arena_size = 32768; +// Because pointers make up the bulk of our exception-initialization +// allocations, we scale by the pointer size from the original +// 32-bit-systems-based estimate. +const int arena_size = 32768 * ((sizeof (void *) + 3)/4); #endif #endif brgds, H-P
[PATCH][PR debug/60655] Power/GCC: Reject cross-section symbol subtraction
Hi, Similarly to ARM, where this issue was seen originally, and likely many other targets, the Power ABI does not appear to have a relocation defined to support taking a difference of two symbols in different sections each. This is seen as a failure in gcc.c-torture/compile/pr60655-2.c: Executing on host: powerpc-linux-gnu-gcc -fno-diagnostics-show-caret -fdiagnostics-color=never -O3 -g -w -c -o pr60655-2.o .../gcc/testsuite/gcc.c-torture/compile/pr60655-2.c(timeout = 300) /tmp/ccAfNLMj.s: Assembler messages: /tmp/ccAfNLMj.s:932: Error: can't resolve `L0^A' {*ABS* section} - `.LANCHOR0' {.bss section} /tmp/ccAfNLMj.s:932: Error: expression too complex compiler exited with status 1 output is: /tmp/ccAfNLMj.s: Assembler messages: /tmp/ccAfNLMj.s:932: Error: can't resolve `L0^A' {*ABS* section} - `.LANCHOR0' {.bss section} /tmp/ccAfNLMj.s:932: Error: expression too complex FAIL: gcc.c-torture/compile/pr60655-2.c -O3 -g (test for excess errors) Excess errors: /tmp/ccAfNLMj.s:932: Error: can't resolve `L0^A' {*ABS* section} - `.LANCHOR0' {.bss section} /tmp/ccAfNLMj.s:932: Error: expression too complex Here's a port of the original ARM fix (commit 209269), that removes the failure for me. Regression-tested with the following powerpc-gnu-linux multilibs: -mcpu=603e -mcpu=603e -msoft-float -mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -mcpu=7400 -maltivec -mabi=altivec -mcpu=e6500 -maltivec -mabi=altivec -mcpu=e5500 -m64 -mcpu=e6500 -m64 -maltivec -mabi=altivec OK for trunk and 4.9? 2014-09-02 Maciej W. Rozycki ma...@codesourcery.com PR debug/60655 * config/rs6000/rs6000.c (rs6000_const_not_ok_for_debug_p): Reject MINUS with SYM_REFs in different sections. Maciej gcc-rs6000-minus-not-ok-for-debug.diff Index: gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.c === --- gcc-fsf-trunk-quilt.orig/gcc/config/rs6000/rs6000.c 2014-08-26 20:30:10.348973028 +0100 +++ gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.c 2014-09-01 17:09:23.748927487 +0100 @@ -6974,7 +6974,13 @@ rs6000_delegitimize_address (rtx orig_x) /* Return true if X shouldn't be emitted into the debug info. The linker doesn't like .toc section references from - .debug_* sections, so reject .toc section symbols. */ + .debug_* sections, so reject .toc section symbols. + + Also as a temporary fix for PR60655 we reject certain MINUS + expressions. Ideally we need to handle most of these cases in + the generic part but currently we reject minus (..) (sym_ref). + We try to ameliorate the case with minus (sym_ref1) (sym_ref2) + where they are in the same section. */ static bool rs6000_const_not_ok_for_debug_p (rtx x) @@ -6988,6 +6994,35 @@ rs6000_const_not_ok_for_debug_p (rtx x) return true; } + if (GET_CODE (x) == MINUS) +{ + tree decl_op0 = NULL; + tree decl_op1 = NULL; + + if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF) + { +decl_op1 = SYMBOL_REF_DECL (XEXP (x, 1)); +if (decl_op1 + GET_CODE (XEXP (x, 0)) == SYMBOL_REF + (decl_op0 = SYMBOL_REF_DECL (XEXP (x, 0 + { +if ((TREE_CODE (decl_op1) == VAR_DECL + || TREE_CODE (decl_op1) == CONST_DECL) + (TREE_CODE (decl_op0) == VAR_DECL +|| TREE_CODE (decl_op0) == CONST_DECL)) + return (get_variable_section (decl_op1, false) + != get_variable_section (decl_op0, false)); + +if (TREE_CODE (decl_op1) == LABEL_DECL + TREE_CODE (decl_op0) == LABEL_DECL) + return (DECL_CONTEXT (decl_op1) + != DECL_CONTEXT (decl_op0)); + } + +return true; + } +} + return false; }
Re: Fix libgomp crash without TLS (PR42616)
May I use gomp_free_thread as a destructor for pthread_key_create? Then I'll make initial_thread_tls_data global for the first case, but how can I differentiate thread created by gomp_thread_start (second case)? 2014-09-01 14:51 GMT+04:00 Jakub Jelinek ja...@redhat.com: On Fri, Aug 29, 2014 at 10:40:57AM -0700, Richard Henderson wrote: On 08/06/2014 03:05 AM, Varvara Rainchik wrote: * libgomp.h (gomp_thread): For non TLS case create thread data. * team.c (create_non_tls_thread_data): New function. --- diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index a1482cc..cf3ec8f 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -479,9 +479,15 @@ static inline struct gomp_thread *gomp_thread (void) } #else extern pthread_key_t gomp_tls_key; +extern struct gomp_thread *create_non_tls_thread_data (void); static inline struct gomp_thread *gomp_thread (void) { - return pthread_getspecific (gomp_tls_key); + struct gomp_thread *thr = pthread_getspecific (gomp_tls_key); + if (thr == NULL) + { +thr = create_non_tls_thread_data (); + } + return thr; } This should never happen. I guess it can happen if you mix up explicit pthread_create and libgomp APIs. initialize_team will only initialize it in the initial thread, while if you use #pragma omp ... or omp_* calls from a thread created with pthread_create, in the !HAVE_TLS case pthread_getspecific will return NULL. Now, the patch doesn't handle that case completely though (and is badly formatted), the problem is that if we allocate in the !HAVE_TLS case in non-initial thread the TLS data, we want to free them again, so that would mean pthread_key_create with non-NULL destructor, and then we need to differentiate in between the 3 cases - key equal to initial_thread_tls_data (would need to move out of the block context), no freeing needed, thread created by gomp_thread_start, no freeing needed, otherwise free. The thread-specific data is set in gomp_thread_start and initialize_team. Where are you getting a call to gomp_thread that hasn't been through one of those functions? Jakub
Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass
On Tue, Sep 02, 2014 at 10:02:38AM +0800, Bin.Cheng wrote: Archaeology suggests this check is because the clobber might be an earlyclobber. Which seems silly: how can it be a valid insn at all in that case? It seems to me the check can just be removed. That will hide your issue, maybe even solve it (but I doubt it). Silly for other reasons, namely that earlyclobber doesn't come into play until after combine (register allocation and later). The last change to this code was by Ulrich (cc:ed); in that thread (June 2004, mostly not threaded in the mail archive, broken MUAs :-( ) it was said that any clobber should be considered an earlyclobber (an RTL insn can expand to multiple machine instructions, for example). But I don't see how that can matter for dest here (the dest of insn, that's 76 in the example), only for src. The version of flags set in 76 obviously dies in 77 (it clobbers the reg after all), but there is no way it could clobber it before it uses it, that just makes no sense. And in the combined insn that version of flags does not exist at all. Agreed, otherwise it would be another uninitialized use problem. Maybe the check is too strict here? Do you have some archived page address for that, just saving us some time for digging. http://gcc.gnu.org/ml/gcc-patches/2004-06/msg00994.html (and look in that month's archives for the rest of the messages). My only concern is, logic in dictribute_notes should also be revisited under this BZ. I think the issue will be hidden by changes we are talking about in can_combine_p. Yes. Unless we disallow all combinations that *would* cause problems :-) Segher
Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass
On Mon, Sep 01, 2014 at 09:28:09PM -0600, Jeff Law wrote: Note that in this case we're talking about a hard register, not a pseudo. I was referring to r84 in Bin's message, not the condition code register. Unless I missed something it's set at the start of the sequence to the value 0, then later to -ltu(flags,cc,0). Bin said that the three-insn combination is refused because of the flags register, not r84. So either the four-insn combination should do those same checks, or we should allow it, or both. There's no good reason I can see why we're reusing a pseudo like that. I suspect that if we go back, fix whatever's creating that lame sequence and simply reject combinations involving a pseudo set more than once it won't affect code in any real way. If we wanted to be anal about it, we'd put in some kind of debugging note and someone could do some wider scale testing. All that, too :-) Although it all seems to work fine for two-insn and three-insn combinations. Segher
Re: [PATCH] support ggc hash_map and hash_set
On Tue, Sep 02, 2014 at 10:36:27AM +0200, Richard Biener wrote: On Tue, Sep 2, 2014 at 3:56 AM, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, There are still some issues to make this work really nicely, but this part is probably good enough its worth reviewing. For one thing you can't use ggc hash_map or set in front ends with some types or gengtype will decide to put the overloads of the marking routines it provides in a front end file instead of the one it choose before breaking other front ends. However that seems to be an unrelated issue you can trigger it without using hash_map/set, so we might as well solve it separetly. I had to have the entry marking functions for set deligate to the traits class because gcc 4.9.1 issues clearly bogus errors if you inline the code from the traits implementation. We may well want to make map work the same way at some point to enable some of the special GTY attributes like if_marked, but it doesn't seem to be necessary right now. bootstrapped + regtested without regressions on x86_64-unknown-linux-gnu, ok? Ok if you make the gcc_assert()s in the marking routines gcc_checking_assert()s. sure Btw - do manual markers need any special support for finalizers? I don't think so since marking and finalizers are basically independant. Does the hash table need any special support to make finalizers efficient (avoid recording for each entry if stored in-place?) Well, it actually just stores one for the vector of elements. Howevr I think right now destructor may be called twice once by the Traits::remove call in ~hash_table (), and once as a finalizer. It probably actually makes sense to stop registering finalizers for the entries vector and just use the call to Traits::remove, which would be consistant with non gc hash maps. Trev Thanks, Richard. Trev gcc/ChangeLog: 2014-09-01 Trevor Saunders tsaund...@mozilla.com * alloc-pool.c: Include coretypes.h. * cgraph.h, dbxout.c, dwarf2out.c, except.c, except.h, function.c, function.h, symtab.c, tree-cfg.c, tree-eh.c: Use hash_map and hash_set instead of htab. * ggc-page.c (in_gc): New variable. (ggc_free): Do nothing if a collection is taking place. (ggc_collect): Set in_gc appropriately. * ggc.h (gt_ggc_mx(const char *)): New function. (gt_pch_nx(const char *)): Likewise. (gt_ggc_mx(int)): Likewise. (gt_pch_nx(int)): Likewise. * hash-map.h (hash_map::hash_entry::ggc_mx): Likewise. (hash_map::hash_entry::pch_nx): Likewise. (hash_map::hash_entry::pch_nx_helper): Likewise. (hash_map::hash_map): Adjust. (hash_map::create_ggc): New function. (gt_ggc_mx): Likewise. (gt_pch_nx): Likewise. * hash-set.h (default_hashset_traits::ggc_mx): Likewise. (default_hashset_traits::pch_nx): Likewise. (hash_set::hash_entry::ggc_mx): Likewise. (hash_set::hash_entry::pch_nx): Likewise. (hash_set::hash_entry::pch_nx_helper): Likewise. (hash_set::hash_set): Adjust. (hash_set::create_ggc): New function. (hash_set::elements): Likewise. (gt_ggc_mx): Likewise. (gt_pch_nx): Likewise. * hash-table.h (hash_table::hash_table): Adjust. (hash_table::m_ggc): New member. (hash_table::~hash_table): Adjust. (hash_table::expand): Likewise. (hash_table::empty): Likewise. (gt_ggc_mx): New function. (hashtab_entry_note_pointers): Likewise. (gt_pch_nx): Likewise. diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c index 0d31835..bfaa0e4 100644 --- a/gcc/alloc-pool.c +++ b/gcc/alloc-pool.c @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see #include config.h #include system.h +#include coretypes.h #include alloc-pool.h #include hash-table.h #include hash-map.h diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 879899c..030a1c7 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1604,7 +1604,6 @@ struct cgraph_2node_hook_list; /* Map from a symbol to initialization/finalization priorities. */ struct GTY(()) symbol_priority_map { - symtab_node *symbol; priority_type init; priority_type fini; }; @@ -1872,7 +1871,7 @@ public: htab_t GTY((param_is (symtab_node))) assembler_name_hash; /* Hash table used to hold init priorities. */ - htab_t GTY ((param_is (symbol_priority_map))) init_priority_hash; + hash_mapsymtab_node *, symbol_priority_map *init_priority_hash; FILE* GTY ((skip)) dump_file; diff --git a/gcc/dbxout.c b/gcc/dbxout.c index 946f1d1..d856bdd 100644 --- a/gcc/dbxout.c +++ b/gcc/dbxout.c @@ -2484,12 +2484,9 @@ dbxout_expand_expr (tree expr) /* Helper function for output_used_types. Queue one entry from the used types hash to be output. */ -static int -output_used_types_helper (void
Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass
Segher Boessenkool wreote: On Mon, Sep 01, 2014 at 10:39:10AM -0600, Jeff Law wrote: On 09/01/14 05:38, Segher Boessenkool wrote: On Mon, Sep 01, 2014 at 11:36:07AM +0800, Bin.Cheng wrote: In the testcase (and comment in the proposed patch), why is combine combining four insns at all? That means it rejected combining just the first three. Why did it do that? It is explicitly reject by below code in can_combine_p. if (GET_CODE (PATTERN (i3)) == PARALLEL) for (i = XVECLEN (PATTERN (i3), 0) - 1; i = 0; i--) if (GET_CODE (XVECEXP (PATTERN (i3), 0, i)) == CLOBBER) { /* Don't substitute for a register intended as a clobberable operand. */ rtx reg = XEXP (XVECEXP (PATTERN (i3), 0, i), 0); if (rtx_equal_p (reg, dest)) return 0; Since insn i2 in the list of i0/i1/i2 as below contains parallel clobber of dest_of_insn76/use_of_insn77. 32: r84:SI=0 76: flags:CC=cmp(r84:SI,0x1) REG_DEAD r84:SI 77: {r84:SI=-ltu(flags:CC,0);clobber flags:CC;} REG_DEAD flags:CC REG_UNUSED flags:CC Archaeology suggests this check is because the clobber might be an earlyclobber. Which seems silly: how can it be a valid insn at all in that case? It seems to me the check can just be removed. That will hide your issue, maybe even solve it (but I doubt it). Silly for other reasons, namely that earlyclobber doesn't come into play until after combine (register allocation and later). The last change to this code was by Ulrich (cc:ed); in that thread (June 2004, mostly not threaded in the mail archive, broken MUAs :-( ) it was said that any clobber should be considered an earlyclobber (an RTL insn can expand to multiple machine instructions, for example). But I don't see how that can matter for dest here (the dest of insn, that's 76 in the example), only for src. The version of flags set in 76 obviously dies in 77 (it clobbers the reg after all), but there is no way it could clobber it before it uses it, that just makes no sense. And in the combined insn that version of flags does not exist at all. This seems the time period where the email archive is not fully complete; some of the mails of that 2004 thread apparently were not linked into the monthly thread list. This archive seems to have them all: http://marc.info/?t=108747834900012r=1w=2 In any case, this test in can_combine_p rejects a combination for *two* different issues. One is the earlyclobber problem, which is what that 2004 thread was about, and which my patch back then relaxed for fixed hard register. However, this doesn't seem to apply to the example above; that is really about the second problem: don't substitute into a clobber. I understand the reason why this particular substitution is rejected is simply that if it weren't, we'd be substituting flags:CC=cmp(r84:SI,0x1) into clobber flags:CC, resulting in clobber cmp(r84:SI,0x1), which is invalid RTL. Now I guess this check could be relaxed if somewhere else in combine we'd recognize the substitution into a clobber and simply omit it in that case. Bye, Ulrich -- Dr. Ulrich Weigand GNU/Linux compilers and toolchain ulrich.weig...@de.ibm.com
[PATCH][match-and-simplify] Fix single RHS code-gen
Appearantly we didn't exercise this before and thus it has gone unnoticed that we don't properly special case single-RHSs on GIMPLE. Fixed as follows. Bootstrapped on x86_64-unknown-linux-gnu, applied. Richard. 2014-09-02 Richard Biener rguent...@suse.de * gimple-match-head.c (maybe_build_generic_op): New function. (maybe_push_res_to_seq): Use it. * gimple-match.h (maybe_build_generic_op): Declare. * gimple-fold.c (fold_stmt_1): Use maybe_build_generic_op. Index: gcc/gimple-match-head.c === --- gcc/gimple-match-head.c (revision 214795) +++ gcc/gimple-match-head.c (working copy) @@ -267,6 +267,27 @@ gimple_resimplify3 (gimple_seq *seq, } +/* If in GIMPLE expressions with CODE go as single-rhs build + a GENERIC tree for that expression into *OP0. */ + +void +maybe_build_generic_op (enum tree_code code, tree type, + tree *op0, tree op1, tree op2) +{ + switch (code) +{ +case REALPART_EXPR: +case IMAGPART_EXPR: +case VIEW_CONVERT_EXPR: + *op0 = build1 (code, type, *op0); + break; +case BIT_FIELD_REF: + *op0 = build3 (code, type, *op0, op1, op2); + break; +default:; +} +} + /* Push the exploded expression described by RCODE, TYPE and OPS as a statement to SEQ if necessary and return a gimple value denoting the value of the expression. If RES is not NULL @@ -286,8 +307,6 @@ maybe_push_res_to_seq (code_helper rcode return ops[0]; if (!seq) return NULL_TREE; - if (!res) - res = make_ssa_name (type, NULL); /* Play safe and do not allow abnormals to be mentioned in newly created statements. */ if ((TREE_CODE (ops[0]) == SSA_NAME @@ -299,6 +318,9 @@ maybe_push_res_to_seq (code_helper rcode TREE_CODE (ops[2]) == SSA_NAME SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2]))) return NULL_TREE; + if (!res) + res = make_ssa_name (type, NULL); + maybe_build_generic_op (rcode, type, ops[0], ops[1], ops[2]); gimple new_stmt = gimple_build_assign_with_ops (rcode, res, ops[0], ops[1], ops[2]); gimple_seq_add_stmt_without_update (seq, new_stmt); @@ -311,8 +333,6 @@ maybe_push_res_to_seq (code_helper rcode tree decl = builtin_decl_implicit (rcode); if (!decl) return NULL_TREE; - if (!res) - res = make_ssa_name (type, NULL); unsigned nargs = type_num_arguments (TREE_TYPE (decl)); gcc_assert (nargs = 3); /* Play safe and do not allow abnormals to be mentioned in @@ -326,6 +346,8 @@ maybe_push_res_to_seq (code_helper rcode TREE_CODE (ops[2]) == SSA_NAME SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2]))) return NULL_TREE; + if (!res) + res = make_ssa_name (type, NULL); gimple new_stmt = gimple_build_call (decl, nargs, ops[0], ops[1], ops[2]); gimple_call_set_lhs (new_stmt, res); gimple_seq_add_stmt_without_update (seq, new_stmt); Index: gcc/gimple-match.h === --- gcc/gimple-match.h (revision 214795) +++ gcc/gimple-match.h (working copy) @@ -44,6 +44,7 @@ bool gimple_simplify (gimple, code_helpe tree (*)(tree)); tree maybe_push_res_to_seq (code_helper, tree, tree *, gimple_seq *, tree res = NULL_TREE); +void maybe_build_generic_op (enum tree_code, tree, tree *, tree, tree); #endif /* GCC_GIMPLE_MATCH_H */ Index: gcc/gimple-fold.c === --- gcc/gimple-fold.c (revision 214795) +++ gcc/gimple-fold.c (working copy) @@ -2904,6 +2904,9 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, TREE_CODE (ops[2]) == SSA_NAME SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ops[2] { + maybe_build_generic_op (rcode, + TREE_TYPE (gimple_assign_lhs (stmt)), + ops[0], ops[1], ops[2]); gimple_assign_set_rhs_with_ops_1 (gsi, rcode, ops[0], ops[1], ops[2]); if (dump_file (dump_flags TDF_DETAILS))
Add missing Broadwell intrinsics.
Hi, Along with intrinsics for adcx/adox (supported since 4.8) ICC also added intrinsics for adc/sbb [1]. This patch adds them. Bootstraps/passes make-check. Ok for trunk? [1] http://www.xlsoft.com/jp/products/intel/compilers/ccm/2013/Release_Notes_u3.pdf ChangeLog below: gcc/ 2014-09-02 Ilya Tocar ilya.to...@intel.com * config/i386/adxintrin.h (_subborrow_u32): New. (_addcarry_u32): Ditto. (_subborrow_u64): Ditto. (_addcarry_u64): Ditto. * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_SBB32, IX86_BUILTIN_SBB64. (ix86_init_mmx_sse_builtins): Handle __builtin_ia32_sbb_u32, __builtin_ia32_sbb_u64 testsuite/ 2014-09-02 Ilya Tocar ilya.to...@intel.com * gcc.target/i386/adx-addcarryx32-1.c: Test addcarry, subborrow. * gcc.target/i386/adx-addcarryx32-2.c: Ditto. * gcc.target/i386/adx-addcarryx32-3.c: Ditto. * gcc.target/i386/adx-addcarryx64-1.c: Ditto. * gcc.target/i386/adx-addcarryx64-2.c: Ditto. * gcc.target/i386/adx-addcarryx64-3.c: Ditto. --- gcc/config/i386/adxintrin.h | 32 +++ gcc/config/i386/i386.c| 22 gcc/testsuite/gcc.target/i386/adx-addcarryx32-1.c | 5 +++- gcc/testsuite/gcc.target/i386/adx-addcarryx32-2.c | 27 +++ gcc/testsuite/gcc.target/i386/adx-addcarryx32-3.c | 5 +++- gcc/testsuite/gcc.target/i386/adx-addcarryx64-1.c | 5 +++- gcc/testsuite/gcc.target/i386/adx-addcarryx64-2.c | 27 +++ gcc/testsuite/gcc.target/i386/adx-addcarryx64-3.c | 5 +++- 8 files changed, 124 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h index 6118900..8f2c01a 100644 --- a/gcc/config/i386/adxintrin.h +++ b/gcc/config/i386/adxintrin.h @@ -30,6 +30,22 @@ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_subborrow_u32 (unsigned char __CF, unsigned int __X, + unsigned int __Y, unsigned int *__P) +{ +return __builtin_ia32_sbb_u32 (__CF, __Y, __X, __P); +} + +extern __inline unsigned char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_addcarry_u32 (unsigned char __CF, unsigned int __X, + unsigned int __Y, unsigned int *__P) +{ +return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P); +} + +extern __inline unsigned char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) _addcarryx_u32 (unsigned char __CF, unsigned int __X, unsigned int __Y, unsigned int *__P) { @@ -39,6 +55,22 @@ _addcarryx_u32 (unsigned char __CF, unsigned int __X, #ifdef __x86_64__ extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_subborrow_u64 (unsigned char __CF, unsigned long __X, + unsigned long __Y, unsigned long long *__P) +{ +return __builtin_ia32_sbb_u64 (__CF, __Y, __X, __P); +} + +extern __inline unsigned char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_addcarry_u64 (unsigned char __CF, unsigned long __X, + unsigned long __Y, unsigned long long *__P) +{ +return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P); +} + +extern __inline unsigned char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) _addcarryx_u64 (unsigned char __CF, unsigned long __X, unsigned long __Y, unsigned long long *__P) { diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3e4c93e..91b5d06 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28778,6 +28778,10 @@ enum ix86_builtins IX86_BUILTIN_ADDCARRYX32, IX86_BUILTIN_ADDCARRYX64, + /* ADC/SBB instructions. */ + IX86_BUILTIN_SBB32, + IX86_BUILTIN_SBB64, + /* FSGSBASE instructions. */ IX86_BUILTIN_RDFSBASE32, IX86_BUILTIN_RDFSBASE64, @@ -31213,6 +31217,14 @@ ix86_init_mmx_sse_builtins (void) UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, IX86_BUILTIN_ADDCARRYX64); + /* ADX/SBB */ + def_builtin (0, __builtin_ia32_sbb_u32, + UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); + def_builtin (OPTION_MASK_ISA_64BIT, + __builtin_ia32_sbb_u64, + UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, + IX86_BUILTIN_SBB64); + /* Read/write FLAGS. */ def_builtin (~OPTION_MASK_ISA_64BIT, __builtin_ia32_readeflags_u32, UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); @@ -35617,6 +35629,16 @@ rdseed_step: emit_insn (gen_zero_extendqisi2 (target, op2)); return target; +case IX86_BUILTIN_SBB32: + icode = CODE_FOR_subsi3_carry; + mode0 = SImode; + goto addcarryx; + +case IX86_BUILTIN_SBB64: + icode = CODE_FOR_subdi3_carry; + mode0 = DImode; + goto addcarryx; + case IX86_BUILTIN_ADDCARRYX32: icode = TARGET_ADX ?
Re: [PATCH][0/7][ARM] Convert VFP mnemonics to UAL
Ping on this series? Thanks, Kyrill On 19/08/14 16:04, Kyrill Tkachov wrote: Hi all, This patch series converts the arm backend to output unified assembly syntax for the VFP instructions. This makes it more readable since most UAL mnemonics also include various type suffixes such as .f32 and .f64 that quickly allow to identify the data types being operated on. Each patch is independent of the rest and can be applied in any order. Bootstrapped and tested on arm-none-linux-gnueabihf with gas from binutils 2.22 and newer. Compiled various floating point benchmarks to make sure the binaries are identical. Ok for trunk? Thanks, Kyrill gcc/config/arm/arm-protos.h |2 +- gcc/config/arm/arm.c | 12 - gcc/config/arm/arm.md|2 +- gcc/config/arm/vfp.md| 91 ++ gcc/testsuite/gcc.target/arm/vfp-1.c | 68 - 5 files changed, 93 insertions(+), 82 deletions(-)
[FORTRAN PATCH] Two -Wlogical-not-parentheses fixes (PR fortran/62270)
(Now for the real fix.) This patch fixes the last two spots where -Wlogical-not-parentheses warns. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62270#c3 if you want more info about the changes. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2014-09-02 Marek Polacek pola...@redhat.com PR fortran/62270 * interface.c (compare_parameter): Fix condition. * trans-expr.c (gfc_conv_procedure_call): Likewise. * gfortran.dg/pointer_intent_7.f90: Adjust dg-error. diff --git gcc/fortran/interface.c gcc/fortran/interface.c index b210d18..f6233b7 100644 --- gcc/fortran/interface.c +++ gcc/fortran/interface.c @@ -2014,7 +2014,7 @@ compare_parameter (gfc_symbol *formal, gfc_expr *actual, if (formal-ts.type == BT_CLASS formal-attr.class_ok actual-expr_type != EXPR_NULL ((CLASS_DATA (formal)-attr.class_pointer - !formal-attr.intent == INTENT_IN) + formal-attr.intent != INTENT_IN) || CLASS_DATA (formal)-attr.allocatable)) { if (actual-ts.type != BT_CLASS) diff --git gcc/fortran/trans-expr.c gcc/fortran/trans-expr.c index f2ed474..4c057ee 100644 --- gcc/fortran/trans-expr.c +++ gcc/fortran/trans-expr.c @@ -4589,7 +4589,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, e-expr_type == EXPR_VARIABLE (!e-ref || (e-ref-type == REF_ARRAY - !e-ref-u.ar.type != AR_FULL)) + e-ref-u.ar.type != AR_FULL)) e-symtree-n.sym-attr.optional) { tmp = fold_build3_loc (input_location, COND_EXPR, diff --git gcc/testsuite/gfortran.dg/pointer_intent_7.f90 gcc/testsuite/gfortran.dg/pointer_intent_7.f90 index c09eb2b..5387ace 100644 --- gcc/testsuite/gfortran.dg/pointer_intent_7.f90 +++ gcc/testsuite/gfortran.dg/pointer_intent_7.f90 @@ -23,7 +23,7 @@ contains call bar2 (c) call bar3 (c) call bar2p (b) ! { dg-error INTENT\\(IN\\) in pointer association context \\(actual argument to INTENT = OUT/INOUT } -call bar3p (b) ! { dg-error INTENT\\(IN\\) in pointer association context \\(actual argument to INTENT = OUT/INOUT } +call bar3p (b) ! { dg-error Actual argument to .n. at \\(1\\) must be polymorphic } call bar2p (c) ! { dg-error INTENT\\(IN\\) in pointer association context \\(actual argument to INTENT = OUT/INOUT } call bar3p (c) ! { dg-error INTENT\\(IN\\) in pointer association context \\(actual argument to INTENT = OUT/INOUT } end subroutine Marek
Re: Add missing Broadwell intrinsics.
On Tue, Sep 2, 2014 at 2:36 PM, Ilya Tocar tocarip.in...@gmail.com wrote: Hi, Along with intrinsics for adcx/adox (supported since 4.8) ICC also added intrinsics for adc/sbb [1]. This patch adds them. Bootstraps/passes make-check. Ok for trunk? [1] http://www.xlsoft.com/jp/products/intel/compilers/ccm/2013/Release_Notes_u3.pdf ChangeLog below: gcc/ 2014-09-02 Ilya Tocar ilya.to...@intel.com * config/i386/adxintrin.h (_subborrow_u32): New. (_addcarry_u32): Ditto. (_subborrow_u64): Ditto. (_addcarry_u64): Ditto. * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_SBB32, IX86_BUILTIN_SBB64. (ix86_init_mmx_sse_builtins): Handle __builtin_ia32_sbb_u32, __builtin_ia32_sbb_u64 testsuite/ 2014-09-02 Ilya Tocar ilya.to...@intel.com * gcc.target/i386/adx-addcarryx32-1.c: Test addcarry, subborrow. * gcc.target/i386/adx-addcarryx32-2.c: Ditto. * gcc.target/i386/adx-addcarryx32-3.c: Ditto. * gcc.target/i386/adx-addcarryx64-1.c: Ditto. * gcc.target/i386/adx-addcarryx64-2.c: Ditto. * gcc.target/i386/adx-addcarryx64-3.c: Ditto. OK with two comment changes below. Thanks, Uros. + /* ADC/SBB instructions. */ Just SBB instruction. + IX86_BUILTIN_SBB32, + IX86_BUILTIN_SBB64, + /* FSGSBASE instructions. */ IX86_BUILTIN_RDFSBASE32, IX86_BUILTIN_RDFSBASE64, @@ -31213,6 +31217,14 @@ ix86_init_mmx_sse_builtins (void) UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, IX86_BUILTIN_ADDCARRYX64); + /* ADX/SBB */ Also here, these builtins implement just SBB instruction. + def_builtin (0, __builtin_ia32_sbb_u32, + UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); + def_builtin (OPTION_MASK_ISA_64BIT, + __builtin_ia32_sbb_u64, + UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, + IX86_BUILTIN_SBB64); +
Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass
On Tue, Sep 02, 2014 at 02:10:32PM +0200, Ulrich Weigand wrote: In any case, this test in can_combine_p rejects a combination for *two* different issues. One is the earlyclobber problem, which is what that 2004 thread was about, and which my patch back then relaxed for fixed hard register. However, this doesn't seem to apply to the example above; that is really about the second problem: don't substitute into a clobber. Right. I understand the reason why this particular substitution is rejected is simply that if it weren't, we'd be substituting flags:CC=cmp(r84:SI,0x1) into clobber flags:CC, resulting in clobber cmp(r84:SI,0x1), which is invalid RTL. I checked, and that is indeed what combine does. How silly. Now I guess this check could be relaxed if somewhere else in combine we'd recognize the substitution into a clobber and simply omit it in that case. Yeah. In the testcase, combine tries combining 76,77 (77 is that clobbering insn) and refuses it; then it tries 32,76,77 and refuses it; and then it tries 32,76,77,43 and allows it (it doesn't do this check at all, 77 is not i3, combine omits the clobber completely). Which is inconsistent. What a mess. Thanks for looking! Segher
[C PATCH] Backport a fix for PR62294 to 4.9
PR62294 reports that 4.9 does not emit an incompatible pointer type warning in certain scenario. I unknowingly broke this in r207335, and then fixed it in r210980, which is a follow-up to the former. But 4.9 doesn't have the latter. This patch is basically a backport of r210980, only without the traditional conversion stuff. Bootstrapped/regtested on x86_64-linux, ok for 4.9? 2014-09-02 Marek Polacek pola...@redhat.com PR c/62294 * c-typeck.c (convert_arguments): Get location of a parameter. Change error and warning calls to error_at and warning_at. Pass location of a parameter to it. (convert_for_assignment): Add parameter to WARN_FOR_ASSIGNMENT and WARN_FOR_QUALIFIERS. Pass expr_loc to those. * gcc.dg/pr56724-1.c: New test. * gcc.dg/pr56724-2.c: New test. diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c index 5838d6a..d096ad4 100644 --- gcc/c/c-typeck.c +++ gcc/c/c-typeck.c @@ -3071,6 +3071,12 @@ convert_arguments (location_t loc, veclocation_t arg_loc, tree typelist, bool excess_precision = false; bool npc; tree parmval; + /* Some __atomic_* builtins have additional hidden argument at +position 0. */ + location_t ploc + = !arg_loc.is_empty () values-length () == arg_loc.length () + ? expansion_point_location_if_in_system_header (arg_loc[parmnum]) + : input_location; if (type == void_type_node) { @@ -3113,7 +3119,8 @@ convert_arguments (location_t loc, veclocation_t arg_loc, tree typelist, if (type == error_mark_node || !COMPLETE_TYPE_P (type)) { - error (type of formal parameter %d is incomplete, parmnum + 1); + error_at (ploc, type of formal parameter %d is incomplete, + parmnum + 1); parmval = val; } else @@ -3128,34 +3135,34 @@ convert_arguments (location_t loc, veclocation_t arg_loc, tree typelist, if (INTEGRAL_TYPE_P (type) TREE_CODE (valtype) == REAL_TYPE) - warning (0, passing argument %d of %qE as integer -rather than floating due to prototype, -argnum, rname); + warning_at (ploc, 0, passing argument %d of %qE as + integer rather than floating due to + prototype, argnum, rname); if (INTEGRAL_TYPE_P (type) TREE_CODE (valtype) == COMPLEX_TYPE) - warning (0, passing argument %d of %qE as integer -rather than complex due to prototype, -argnum, rname); + warning_at (ploc, 0, passing argument %d of %qE as + integer rather than complex due to + prototype, argnum, rname); else if (TREE_CODE (type) == COMPLEX_TYPE TREE_CODE (valtype) == REAL_TYPE) - warning (0, passing argument %d of %qE as complex -rather than floating due to prototype, -argnum, rname); + warning_at (ploc, 0, passing argument %d of %qE as + complex rather than floating due to + prototype, argnum, rname); else if (TREE_CODE (type) == REAL_TYPE INTEGRAL_TYPE_P (valtype)) - warning (0, passing argument %d of %qE as floating -rather than integer due to prototype, -argnum, rname); + warning_at (ploc, 0, passing argument %d of %qE as + floating rather than integer due to + prototype, argnum, rname); else if (TREE_CODE (type) == COMPLEX_TYPE INTEGRAL_TYPE_P (valtype)) - warning (0, passing argument %d of %qE as complex -rather than integer due to prototype, -argnum, rname); + warning_at (ploc, 0, passing argument %d of %qE as + complex rather than integer due to + prototype, argnum, rname); else if (TREE_CODE (type) == REAL_TYPE TREE_CODE (valtype) == COMPLEX_TYPE) - warning (0, passing argument %d of %qE as floating -rather than complex due to prototype, -argnum, rname); + warning_at (ploc, 0, passing argument %d of %qE as + floating rather than complex due to + prototype, argnum, rname); /*
[PATCH] PRE TLC
The following patch removes dead code (blocks are never defered because we iterate in a proper CFG order now) and avoids building up the el_avail vector one element at a time. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2014-09-02 Richard Biener rguent...@suse.de * tree-ssa-pre.c (alloc_expression_id): Use quick_grow_cleared. (struct bb_bitmap_sets): Remove deferred member. (BB_DEFERRED): Remove. (defer_or_phi_translate_block): Remove. (compute_antic_aux): Remove deferring of blocks, assert proper iteration order. (compute_antic): Do not set BB_DEFERRED. (eliminate): Allocate el_avail of proper size initially. Index: gcc/tree-ssa-pre.c === --- gcc/tree-ssa-pre.c.orig 2014-09-02 16:01:08.733146617 +0200 +++ gcc/tree-ssa-pre.c 2014-09-02 15:56:23.687166242 +0200 @@ -272,11 +272,10 @@ alloc_expression_id (pre_expr expr) { unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr)); /* vec::safe_grow_cleared allocates no headroom. Avoid frequent -re-allocations by using vec::reserve upfront. There is no -vec::quick_grow_cleared unfortunately. */ +re-allocations by using vec::reserve upfront. */ unsigned old_len = name_to_id.length (); name_to_id.reserve (num_ssa_names - old_len); - name_to_id.safe_grow_cleared (num_ssa_names); + name_to_id.quick_grow_cleared (num_ssa_names); gcc_assert (name_to_id[version] == 0); name_to_id[version] = expr-id; } @@ -427,10 +426,6 @@ typedef struct bb_bitmap_sets /* True if we have visited this block during ANTIC calculation. */ unsigned int visited : 1; - /* True we have deferred processing this block during ANTIC - calculation until its successor is processed. */ - unsigned int deferred : 1; - /* True when the block contains a call that might not return. */ unsigned int contains_may_not_return_call : 1; } *bb_value_sets_t; @@ -444,7 +439,6 @@ typedef struct bb_bitmap_sets #define NEW_SETS(BB) ((bb_value_sets_t) ((BB)-aux))-new_sets #define EXPR_DIES(BB) ((bb_value_sets_t) ((BB)-aux))-expr_dies #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)-aux))-visited -#define BB_DEFERRED(BB) ((bb_value_sets_t) ((BB)-aux))-deferred #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) ((BB)-aux))-contains_may_not_return_call @@ -2085,26 +2079,6 @@ static sbitmap has_abnormal_preds; static sbitmap changed_blocks; -/* Decide whether to defer a block for a later iteration, or PHI - translate SOURCE to DEST using phis in PHIBLOCK. Return false if we - should defer the block, and true if we processed it. */ - -static bool -defer_or_phi_translate_block (bitmap_set_t dest, bitmap_set_t source, - basic_block block, basic_block phiblock) -{ - if (!BB_VISITED (phiblock)) -{ - bitmap_set_bit (changed_blocks, block-index); - BB_VISITED (block) = 0; - BB_DEFERRED (block) = 1; - return false; -} - else -phi_translate_set (dest, source, block, phiblock); - return true; -} - /* Compute the ANTIC set for BLOCK. If succs(BLOCK) 1 then @@ -2144,30 +2118,8 @@ compute_antic_aux (basic_block block, bo else if (single_succ_p (block)) { basic_block succ_bb = single_succ (block); - - /* We trade iterations of the dataflow equations for having to -phi translate the maximal set, which is incredibly slow -(since the maximal set often has 300+ members, even when you -have a small number of blocks). -Basically, we defer the computation of ANTIC for this block -until we have processed it's successor, which will inevitably -have a *much* smaller set of values to phi translate once -clean has been run on it. -The cost of doing this is that we technically perform more -iterations, however, they are lower cost iterations. - -Timings for PRE on tramp3d-v4: -without maximal set fix: 11 seconds -with maximal set fix/without deferring: 26 seconds -with maximal set fix/with deferring: 11 seconds - */ - - if (!defer_or_phi_translate_block (ANTIC_OUT, ANTIC_IN (succ_bb), - block, succ_bb)) - { - changed = true; - goto maybe_dump_sets; - } + gcc_assert (BB_VISITED (succ_bb)); + phi_translate_set (ANTIC_OUT, ANTIC_IN (succ_bb), block, succ_bb); } /* If we have multiple successors, we take the intersection of all of them. Note that in the case of loop exit phi nodes, we may have @@ -2187,20 +2139,11 @@ compute_antic_aux (basic_block block, bo worklist.quick_push (e-dest); } - /* Of multiple successors we have to have visited one already. */ - if (!first) - { - bitmap_set_bit (changed_blocks,
[PATCH][match-and-simplify] Add comparison patterns
The following patch adds more comparison patterns (with comments on what is missing still). Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2014-09-02 Richard Biener rguent...@suse.de * fold-const.h (negate_expr_p): Declare. * fold-const.c (negate_expr_p): Export. * match-comparison.pd: Implement more comparison patterns. Index: gcc/match-comparison.pd === *** gcc/match-comparison.pd.orig2014-09-02 11:10:06.855348847 +0200 --- gcc/match-comparison.pd 2014-09-02 13:30:52.392767381 +0200 *** *** 1,3 --- 1,62 + /* From fold_binary. */ + + (simplify + (ne @0 integer_zerop@1) + (if (TREE_CODE (TREE_TYPE (@0)) == BOOLEAN_TYPE) + /* ??? In GENERIC the type of the comparison may be 'int'. */ + (convert @0))) + + /* Distribute operations in equality compares. */ + (for op in eq ne + /* -exp op CST is exp op -CST. */ + (simplify + (op (negate @0) INTEGER_CST@1) + /* ??? fix fold-const to use negate_expr_p */ + (if (negate_expr_p (@1)) +(op @0 (negate @1 + /* X ^ C1 == C2 is X == (C1 ^ C2). */ + (simplify + (op (bit_xor @0 INTEGER_CST@1) INTEGER_CST@2) + (op @0 (bit_xor @1 @2 + + /* From fold_comparison, in the order of transforms in there. */ + + /* Transform comparisons of the form X +- C1 CMP C2 to X CMP C2 -+ C1. */ + (for cmp in lt le eq ge gt ne + (for op in plus minus + (simplify +(cmp (op @0 INTEGER_CST@1) INTEGER_CST@2) +(if ((cmp == NE_EXPR || cmp == EQ_EXPR +|| TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))) +(@3 = int_const_binop (op == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, @2, @1)) + /* ??? fold_comparison here does, when @1 and @2 didn't have + TREE_OVERFLOW set, simplify the comparison to true/false + by using a staturated add. */ +!TREE_OVERFLOW (@3)) + (cmp @0 @3) + + /* Transform comparisons of the form X - Y CMP 0 to X CMP Y. */ + /* ??? The transformation is valid for the other operators if overflow +is undefined for the type, but performing it here badly interacts +with the transformation in fold_cond_expr_with_comparison which +attempts to synthetize ABS_EXPR. */ + (for cmp in eq ne + (simplify + (cmp (minus @0 @1) integer_zerop) + (cmp @0 @1))) + + /* For comparisons of pointers we can decompose it to a compile time +comparison of the base objects and the offsets into the object. +This requires at least one operand being an ADDR_EXPR or a +POINTER_PLUS_EXPR to do more than the operand_equal_p test below. */ + #if 0 + (for cmp in lt le eq ge gt ne + (for op in addr pointer_plus + (simplify +(cmp:c (op@0 @1 @2) @3) +(if (simplify_addr_comparison (@0, @3, @@)) + #endif + /* Simplify X * C1 CMP 0 to X CMP 0 if C1 is not zero. */ (for op in lt le eq ne ge gt (simplify *** *** 13,15 --- 72,141 (if (tree_int_cst_sgn (@1) 0) (op @2 @0) + #if 0 + /* If this is comparing a constant with a MIN_EXPR or a MAX_EXPR of a +constant, we can simplify it. */ + (for op in min max + (for cmp in eq gt + (cmp (op @0 INTEGER_CST@1) INTEGER_CST@2) + (if (op == MAX_EXPR tree_int_cst_compare (@1, @2) == 0) +(le @0 @2)) + (if ( + ) +/* ??? optimize_minmax_comparison handles ne, lt and le by + recursing with an inverted comparison and then inverting + the result. Or combining equality and gt with truth_or. */) + #endif + + /* Simplify comparison of something with itself. For IEEE +floating-point, we can only do some of these simplifications. */ + (for cmp in ge le + (simplify + (cmp @0 @0) + (eq @0 @0))) + (simplify + (eq @0 @0) + (if (! FLOAT_TYPE_P (TREE_TYPE (@0)) + || ! HONOR_NANS (TYPE_MODE (TREE_TYPE (@0 + { constant_boolean_node (true, type); })) + (for cmp in ne gt lt + (simplify + (cmp @0 @0) + (if (cmp != NE_EXPR +|| ! FLOAT_TYPE_P (TREE_TYPE (@0)) +|| ! HONOR_NANS (TYPE_MODE (TREE_TYPE (@0 +{ constant_boolean_node (false, type); }))) + + /* Need to split up the cases in twoval_comparison_p. */ + + #if 0 + /* We can fold X/C1 op C2 where C1 and C2 are integer constants +into a single range test. */ + (for cmp in lt le eq ge gt ne + (for div in trunc_div exact_div + (simplify +(cmp (div @0 INTEGER_CST@1) INTEGER_CST) +(if (!integer_zerop (@1)) +/* ??? Need to think about what fold_div_compare does. IMHO + we can unconditionally build a + (unsigned)@0 +- CST = CST' + range check. */ + + #endif + + /* Fold ~X op ~Y as Y op X. */ + (for cmp in lt le eq ge gt ne + (simplify + (cmp (bit_not @0) (bit_not @1)) + (cmp @1 @0))) + + /* Fold ~X op C as X op' ~C, where op' is the swapped comparison. */ + (for cmp in lt le eq ge gt ne + (simplify + (cmp (bit_not @0) @1) + /* ??? (for cst in INTEGER_CST
Re: [C++ Patch] DR 1453
OK. Jason
Re: [C++ Patch] PR 58102 aka DR 1405
Hi, On 09/02/2014 04:11 PM, Jason Merrill wrote: On 09/01/2014 09:47 AM, Paolo Carlini wrote: -constexpr A b = a;// { dg-error mutable } +constexpr A b = a; This is wrong; we still need to get an error here. Hum, interesting. Neither current EDG nor current clang error out there. Let's see if I can tease the case out... Thanks, Paolo.
Re: [C++ Patch] PR 58102 aka DR 1405
On 09/01/2014 09:47 AM, Paolo Carlini wrote: -constexpr A b = a; // { dg-error mutable } +constexpr A b = a; This is wrong; we still need to get an error here. Jason
Re: [C++ Patch] PR 58102 aka DR 1405
On 09/02/2014 10:17 AM, Paolo Carlini wrote: Let's see if I can tease the case out... I think you need to leave that hunk alone, and instead fix the new testcase by treating = {} more like {}, just as we already don't require a copy constructor call for copy-list-initialization. Jason
Re: Enable EBX for x86 in 32bits PIC code
On 08/29/2014 02:47 AM, Ilya Enkovich wrote: Seems your patch doesn't cover all cases. Attached is a modified patch (with your changes included) and a test where double constant is wrongly rematerialized. I also see in ira dump that there is still a copy of PIC reg created: Initialization of original PIC reg: (insn 23 22 24 2 (set (reg:SI 127) (reg:SI 3 bx)) test.cc:42 90 {*movsi_internal} (expr_list:REG_DEAD (reg:SI 3 bx) (nil))) ... Copy is created: (insn 135 37 25 3 (set (reg:SI 138 [127]) (reg:SI 127)) 90 {*movsi_internal} (expr_list:REG_DEAD (reg:SI 127) (nil))) ... Copy is used: (insn 119 25 122 3 (set (reg:DF 134) (mem/u/c:DF (plus:SI (reg:SI 138 [127]) (const:SI (unspec:SI [ (symbol_ref/u:SI (*.LC0) [flags 0x2]) ] UNSPEC_GOTOFF))) [5 S8 A64])) 128 {*movdf_internal} (expr_list:REG_EQUIV (const_double:DF 2.9997371893933895137251965934410691261292e-4 [0x0.9d495182a99308p-11]) (nil))) After reload we have new usage of r127 which is allocated to ecx which actually does not have any definition in this function at all. (insn 151 42 44 4 (set (reg:SI 0 ax [147]) (plus:SI (reg:SI 2 cx [127]) (const:SI (unspec:SI [ (symbol_ref/u:SI (*.LC0) [flags 0x2]) ] UNSPEC_GOTOFF test.cc:44 213 {*leasi} (expr_list:REG_EQUAL (symbol_ref/u:SI (*.LC0) [flags 0x2]) (nil))) (insn 44 151 45 4 (set (reg:DF 21 xmm0 [orig:129 D.2450 ] [129]) (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128]) (mem/u/c:DF (reg:SI 0 ax [147]) [5 S8 A64]))) test.cc:44 790 {*fop_df_comm_sse} (expr_list:REG_EQUAL (mult:DF (reg:DF 21 xmm0 [orig:128 D.2450 ] [128]) (const_double:DF 2.9997371893933895137251965934410691261292e-4 [0x0.9d495182a99308p-11])) (nil))) Compilation string: g++ -m32 -O2 -mfpmath=sse -fPIE -S test.cc Ok, Ilya. I'll look at the problem this week.
Re: [C++ Patch] PR 58102 aka DR 1405
Hi, On 09/02/2014 04:28 PM, Jason Merrill wrote: On 09/02/2014 10:17 AM, Paolo Carlini wrote: Let's see if I can tease the case out... I think you need to leave that hunk alone, and instead fix the new testcase by treating = {} more like {}, just as we already don't require a copy constructor call for copy-list-initialization. I see. Thanks a lot for the tip! Paolo.
Re: [PATCH AArch64] Rename [u]int32x1_t to [u]int32_t (resp 16x1, 8x1) in arm_neon.h
On 24 July 2014 11:18, Alan Lawrence alan.lawre...@arm.com wrote: The ACLE spec does not mention the int32x1_t, uint32x1_t, int16x1_t, uint16x1_t, int8x1_t or uint8x1_t types currently in arm_neon.h, but just 'standard' types int32_t, int16_t, etc. This patch is a global search-and-replace across arm_neon.h (and the tests that depend on it). Regressed (check-gcc and check-g++) on aarch64-none-elf. OK for trunk. The question of backporting to 4.9 has been raised internally. There is no ABI issue, as int32x1_t was merely a typedef to int32_t (etc.). However there is a source code compatibility issue; code mentioning the 32x1 types, i.e. not conforming to the ACLE spec, which previously compiled, will no longer do so. My personal feeling is therefore not to backport this, but I would welcome input from maintainers (and others)...? I doubt that there is currently much code out there that will be affected by this change and that it would be better to back port and hence limit the amount of code written against the broken arm_neon.h during the life of the 4.9.x series. If there are no objections to back porting in the next couple of days then go ahead. /Marcus
Re: [C/C++ PATCH] Allow __atomic_always_lock_free in a static assert (PR c/62024)
On Wed, Aug 27, 2014 at 03:06:38PM -0400, Jason Merrill wrote: On 08/25/2014 07:43 AM, Marek Polacek wrote: * semantics.c (finish_static_assert): Strip no-op conversions. I think I'd rather strip these in cxx_eval_builtin_function_call so that we don't have to deal with them in various consumers. I was playing with this again today and I've found out that I actually don't need to touch C++ FE at all; maybe_constant_value returns integer_cst in this case. Don't know how I flubbed that. Given that the C part are approved, I'm going to commit the following. Bootstrapped/regtested on x86_64-linux. 2014-09-02 Marek Polacek pola...@redhat.com PR c/62024 * c-parser.c (c_parser_static_assert_declaration_no_semi): Strip no-op conversions. * g++.dg/cpp0x/pr62024.C: New test. * gcc.dg/pr62024.c: New test. diff --git gcc/c/c-parser.c gcc/c/c-parser.c index d634bb1..fc7bbaf 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -2058,6 +2058,8 @@ c_parser_static_assert_declaration_no_semi (c_parser *parser) if (TREE_CODE (value) != INTEGER_CST) { value = c_fully_fold (value, false, NULL); + /* Strip no-op conversions. */ + STRIP_TYPE_NOPS (value); if (TREE_CODE (value) == INTEGER_CST) pedwarn (value_loc, OPT_Wpedantic, expression in static assertion is not an integer constant expression); diff --git gcc/testsuite/g++.dg/cpp0x/pr62024.C gcc/testsuite/g++.dg/cpp0x/pr62024.C index e69de29..5f0640a 100644 --- gcc/testsuite/g++.dg/cpp0x/pr62024.C +++ gcc/testsuite/g++.dg/cpp0x/pr62024.C @@ -0,0 +1,7 @@ +// PR c/62024 +// { dg-do compile { target c++11 } } +// { dg-require-effective-target sync_char_short } + +int *p; +static_assert (__atomic_always_lock_free (1, p), ); +static_assert (__atomic_always_lock_free (1, 0), ); diff --git gcc/testsuite/gcc.dg/pr62024.c gcc/testsuite/gcc.dg/pr62024.c index e69de29..79a0b79 100644 --- gcc/testsuite/gcc.dg/pr62024.c +++ gcc/testsuite/gcc.dg/pr62024.c @@ -0,0 +1,8 @@ +/* PR c/62024 */ +/* { dg-do compile } */ +/* { dg-options -std=gnu11 -Wpedantic } */ +/* { dg-require-effective-target sync_char_short } */ + +int *p; +_Static_assert (__atomic_always_lock_free (1, p), ); /* { dg-warning is not an integer constant } */ +_Static_assert (__atomic_always_lock_free (1, 0), ); /* { dg-warning is not an integer constant } */ Marek
Re: [PATCH] Force rtl templates to be inlined
On Tue, 2014-09-02 at 00:03 -0700, Andi Kleen wrote: From: Andi Kleen a...@linux.intel.com I noticed that with the trunk compiler a range of the new rtl inlines show up as hot in a profiler during stage1. I think that happens because stage1 is not using optimization and does not inline plain inline. And these rtl inlines are very frequently called. Sorry about that. FWIW I'm working on some followup patches for the rtx-classes work that ought to eliminate some of the is_a_helper calls; I hope to post them in the next few days. [1] I suspect the bulk of them currently are coming from the safe_as_a rtx_insn * calls within NEXT_INSN and PREV_INSN; do you happen to have information handy on that? Dave [1] (I have to take the rest of today off for a family matter).
Re: [PATCH] Add -fno-instrument-function
Hmm, why not make -no-pg (does that exist?) and/or -mno-fentry I'm not sure. do this? That is, I don't see the need for a new option. That would be really odd behavior. An yes/no option whose default is controlled by other object files' command line. And -pg would be for all files in LTO, and no-pg only for that file, so not be symmetric. I think an explicit different option has far cleaner semantics for now (at least until the LTO option mess can be properly cleaned up) -Andi
Re: please verify my mail to community.
Hi all! Here's a simple optimization patch for Asan. It stores alignment information into ASAN_CHECK which is then extracted by sanopt to reduce number of and 0x7 instructions for sufficiently aligned accesses. I checked it on linux kernel by comparing results of objdump -d -j .text vmlinux | grep and.*0x7, for optimized and regular cases. It eliminates 12% of and 0x7's. No regressions. Sanitized GCC was successfully Asan-bootstrapped. No false positives were found in kernel. --Marat gcc/ChangeLog: 2014-09-02 Marat Zakirov m.zaki...@samsung.com * asan.c (build_check_stmt): Alignment arg was added. (asan_expand_check_ifn): Optimization for alignment = 8. gcc/testsuite/ChangeLog: 2014-09-02 Marat Zakirov m.zaki...@samsung.com * c-c++-common/asan/red-align-1.c: New test. * c-c++-common/asan/red-align-2.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index 58e7719..aed5ede 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1639,9 +1639,11 @@ build_check_stmt (location_t loc, tree base, tree len, if (end_instrumented) flags |= ASAN_CHECK_END_INSTRUMENTED; - g = gimple_build_call_internal (IFN_ASAN_CHECK, 3, + g = gimple_build_call_internal (IFN_ASAN_CHECK, 4, build_int_cst (integer_type_node, flags), - base, len); + base, len, + build_int_cst (integer_type_node, + align/BITS_PER_UNIT)); gimple_set_location (g, loc); if (before_p) gsi_insert_before (gsi, g, GSI_SAME_STMT); @@ -2434,6 +2436,7 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) tree base = gimple_call_arg (g, 1); tree len = gimple_call_arg (g, 2); + HOST_WIDE_INT align = tree_to_shwi (gimple_call_arg (g, 3)); HOST_WIDE_INT size_in_bytes = is_scalar_access tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1; @@ -2547,7 +2550,10 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) gimple shadow_test = build_assign (NE_EXPR, shadow, 0); gimple_seq seq = NULL; gimple_seq_add_stmt (seq, shadow_test); - gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR, base_addr, 7)); + /* Aligned (= 8 bytes) access do not need 7. */ + if (align 8) + gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR, + base_addr, 7)); gimple_seq_add_stmt (seq, build_type_cast (shadow_type, gimple_seq_last (seq))); if (real_size_in_bytes 1) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 7ae60f3..54ade9f 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -55,4 +55,4 @@ DEF_INTERNAL_FN (UBSAN_CHECK_SUB, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (UBSAN_CHECK_MUL, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ABNORMAL_DISPATCHER, ECF_NORETURN, NULL) DEF_INTERNAL_FN (BUILTIN_EXPECT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) -DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W..) +DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W...) diff --git a/gcc/testsuite/c-c++-common/asan/red-align-1.c b/gcc/testsuite/c-c++-common/asan/red-align-1.c new file mode 100644 index 000..1edb3a2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/red-align-1.c @@ -0,0 +1,20 @@ +/* This tests aligment propagation to structure elem and + abcense of redudant 7. */ + +/* { dg-options -fdump-tree-sanopt } */ +/* { dg-do compile } */ +/* { dg-skip-if { *-*-* } { -flto } { } } */ + +struct st { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +int foo (struct st * s_p) +{ + return s_p-a; +} + +/* { dg-final { scan-tree-dump-times 7 0 sanopt } } */ +/* { dg-final { cleanup-tree-dump sanopt } } */ diff --git a/gcc/testsuite/c-c++-common/asan/red-align-2.c b/gcc/testsuite/c-c++-common/asan/red-align-2.c new file mode 100644 index 000..161fe3c --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/red-align-2.c @@ -0,0 +1,20 @@ +/* This tests aligment propagation to structure elem and + abcense of redudant 7. */ + +/* { dg-options -fdump-tree-sanopt } */ +/* { dg-do compile } */ +/* { dg-skip-if { *-*-* } { -flto } { } } */ + +struct st { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +int foo (struct st * s_p) +{ + return s_p-b; +} + +/* { dg-final { scan-tree-dump-times 7 1 sanopt } } */ +/* { dg-final { cleanup-tree-dump sanopt } } */
Re: [C++ Patch] PR 58102 aka DR 1405
Hi again, On 09/02/2014 04:28 PM, Jason Merrill wrote: On 09/02/2014 10:17 AM, Paolo Carlini wrote: Let's see if I can tease the case out... I think you need to leave that hunk alone, and instead fix the new testcase by treating = {} more like {}, just as we already don't require a copy constructor call for copy-list-initialization. By the way, now I really understand the DR (the wording in the resolution clarifies what we are *already* doing correctly!). Anyway, what about the below? Certainly works for the tests which we have got. Thanks, Paolo. Index: cp/semantics.c === --- cp/semantics.c (revision 214808) +++ cp/semantics.c (working copy) @@ -9859,11 +9859,14 @@ cxx_eval_outermost_constant_expr (tree t, bool all verify_constant (r, allow_non_constant, non_constant_p, overflow_p); if (TREE_CODE (t) != CONSTRUCTOR + (TREE_CODE (t) != TARGET_EXPR + || TREE_CODE (TARGET_EXPR_INITIAL (t)) != AGGR_INIT_EXPR) cp_has_mutable_p (TREE_TYPE (t))) { /* We allow a mutable type if the original expression was a CONSTRUCTOR so that we can do aggregate initialization of -constexpr variables. */ +constexpr variables. Likewise for TARGET_EXPRs with an +AGGR_INIT_EXPR as TARGET_EXPR_INITIAL (c++/58102). */ if (!allow_non_constant) error (%qT cannot be the type of a complete constant expression because it has mutable sub-objects, TREE_TYPE (t)); Index: testsuite/g++.dg/cpp0x/constexpr-mutable2.C === --- testsuite/g++.dg/cpp0x/constexpr-mutable2.C (revision 0) +++ testsuite/g++.dg/cpp0x/constexpr-mutable2.C (working copy) @@ -0,0 +1,10 @@ +// DR 1405, PR c++/58102 +// { dg-do compile { target c++11 } } + +struct S { + mutable int n; + constexpr S() : n() {} +}; + +constexpr S s1 {}; +constexpr S s2 = {};
Re: [PATCH AArch64 1/3] Don't disparage add/sub in SIMD registers
On 18 August 2014 17:50, Alan Lawrence alan.lawre...@arm.com wrote: Well, you're right that it could be. So I presented the wrong justification. Clearly we would benefit from some better cost infrastructure here, ideally that is expressive, taken into account at all appropriate stages of the compiler, and tunable per core. I imagine that steps (patches) towards such infrastructure would be welcomed by both AArch64 maintainers and more widely. In the meantime, however, we must work with what we have. I'll still argue that we should remove the '!' (as per patch), however. As James has said, even if your add is more expensive in SIMD registers, the '!' still doesn't express that; and leaving it in affects code-generation on all cores. And it is inconsistent with other instructions. Agreed and OK. /Marcus
[PATCH] Asan optimization for aligned accesses.
Sorry for wrong subject! On 09/02/2014 07:03 PM, Marat Zakirov wrote: Hi all! Here's a simple optimization patch for Asan. It stores alignment information into ASAN_CHECK which is then extracted by sanopt to reduce number of and 0x7 instructions for sufficiently aligned accesses. I checked it on linux kernel by comparing results of objdump -d -j .text vmlinux | grep and.*0x7, for optimized and regular cases. It eliminates 12% of and 0x7's. No regressions. Sanitized GCC was successfully Asan-bootstrapped. No false positives were found in kernel. --Marat gcc/ChangeLog: 2014-09-02 Marat Zakirov m.zaki...@samsung.com * asan.c (build_check_stmt): Alignment arg was added. (asan_expand_check_ifn): Optimization for alignment = 8. gcc/testsuite/ChangeLog: 2014-09-02 Marat Zakirov m.zaki...@samsung.com * c-c++-common/asan/red-align-1.c: New test. * c-c++-common/asan/red-align-2.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index 58e7719..aed5ede 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1639,9 +1639,11 @@ build_check_stmt (location_t loc, tree base, tree len, if (end_instrumented) flags |= ASAN_CHECK_END_INSTRUMENTED; - g = gimple_build_call_internal (IFN_ASAN_CHECK, 3, + g = gimple_build_call_internal (IFN_ASAN_CHECK, 4, build_int_cst (integer_type_node, flags), - base, len); + base, len, + build_int_cst (integer_type_node, + align/BITS_PER_UNIT)); gimple_set_location (g, loc); if (before_p) gsi_insert_before (gsi, g, GSI_SAME_STMT); @@ -2434,6 +2436,7 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) tree base = gimple_call_arg (g, 1); tree len = gimple_call_arg (g, 2); + HOST_WIDE_INT align = tree_to_shwi (gimple_call_arg (g, 3)); HOST_WIDE_INT size_in_bytes = is_scalar_access tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1; @@ -2547,7 +2550,10 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) gimple shadow_test = build_assign (NE_EXPR, shadow, 0); gimple_seq seq = NULL; gimple_seq_add_stmt (seq, shadow_test); - gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR, base_addr, 7)); + /* Aligned (= 8 bytes) access do not need 7. */ + if (align 8) + gimple_seq_add_stmt (seq, build_assign (BIT_AND_EXPR, + base_addr, 7)); gimple_seq_add_stmt (seq, build_type_cast (shadow_type, gimple_seq_last (seq))); if (real_size_in_bytes 1) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 7ae60f3..54ade9f 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -55,4 +55,4 @@ DEF_INTERNAL_FN (UBSAN_CHECK_SUB, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (UBSAN_CHECK_MUL, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ABNORMAL_DISPATCHER, ECF_NORETURN, NULL) DEF_INTERNAL_FN (BUILTIN_EXPECT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) -DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W..) +DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, .W...) diff --git a/gcc/testsuite/c-c++-common/asan/red-align-1.c b/gcc/testsuite/c-c++-common/asan/red-align-1.c new file mode 100644 index 000..1edb3a2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/red-align-1.c @@ -0,0 +1,20 @@ +/* This tests aligment propagation to structure elem and + abcense of redudant 7. */ + +/* { dg-options -fdump-tree-sanopt } */ +/* { dg-do compile } */ +/* { dg-skip-if { *-*-* } { -flto } { } } */ + +struct st { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +int foo (struct st * s_p) +{ + return s_p-a; +} + +/* { dg-final { scan-tree-dump-times 7 0 sanopt } } */ +/* { dg-final { cleanup-tree-dump sanopt } } */ diff --git a/gcc/testsuite/c-c++-common/asan/red-align-2.c b/gcc/testsuite/c-c++-common/asan/red-align-2.c new file mode 100644 index 000..161fe3c --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/red-align-2.c @@ -0,0 +1,20 @@ +/* This tests aligment propagation to structure elem and + abcense of redudant 7. */ + +/* { dg-options -fdump-tree-sanopt } */ +/* { dg-do compile } */ +/* { dg-skip-if { *-*-* } { -flto } { } } */ + +struct st { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +int foo (struct st * s_p) +{ + return s_p-b; +} + +/* { dg-final { scan-tree-dump-times 7 1 sanopt } } */ +/* { dg-final { cleanup-tree-dump sanopt } } */
Re: [PATCH AArch64 2/3] Add SIMD-reg variants of logical operators and/ior/xor/not
On 12 August 2014 15:43, Alan Lawrence alan.lawre...@arm.com wrote: This patch adds SIMD register variants for and, ior, xor and not - similarly to add/sub, the H/W supports it, and it'll be more efficient if the values are there already, e.g. if passed as [u]int64x1_t parameters. gcc/ChangeLog: * config/aarch64/aarch64.md (optabmode3, one_cmplmode2): Add SIMD-register variant. * config/aarch64/iterators.md (Vbtype): Add value for SI. OK /Marcus
Re: [PATCH AArch64 3/3] Fix XOR_one_cmpl pattern; add SIMD-reg variants for BIC,ORN,EON
On 12 August 2014 15:55, Alan Lawrence alan.lawre...@arm.com wrote: gcc/ChangeLog: * config/aarch64/aarch64.c (LOGICAL:optab_one_cmplmode3): Reparameterize to... (NLOGICAL:optab_one_cmplmode3): with extra SIMD-register variant. (xor_one_cmplmode3): New define_insn_and_split. * config/aarch64/iterators.md (NLOGICAL): New define_code_iterator. gcc/testsuite/ChangeLog: * gcc.target/aarch64/eon_1.c: New test. OK /Marcus
Re: [PATCH AArch64 1/2] Improve codegen of vector compares inc. tst instruction
On 19 August 2014 11:44, Alan Lawrence alan.lawre...@arm.com wrote: gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers, TYPES_TST): Define. (aarch64_fold_builtin): Update pattern for cmtst. * config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p): Declare. * config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers. * config/aarch64/aarch64-simd.md (aarch64_vcond_internalmodemode): Switch operands, separate out more cases, refactor. (aarch64_cmtstmode): Rewrite pattern to match (plus ... -1). * config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single argument; rename old version to... (aarch64_const_vec_all_same_in_range_p): ...this. (aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming. * config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/int_comparisons.x: New file. * gcc.target/aarch64/simd/int_comparisons_1.c: New test. * gcc.target/aarch64/simd/int_comparisons_2.c: Ditto. OK /Marcus
Re: [PATCH AArch64 2/2] Remove vector compare/tst __builtins
On 19 August 2014 14:43, Alan Lawrence alan.lawre...@arm.com wrote: gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code handling cmge, cmgt, cmeq, cmtst. * config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle, cmlt, cmgeu, cmgtu, cmtst): Remove. * config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*, vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*, vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*, vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions. OK /Marcus
[PATCH] aarch64: Enable Neon search_line_fast
Is it intentional or not that AArch64 does not define __ARM_NEON__? Otherwise, here's a better way to fold the test bits. AArch64 of course does not have dN+1 overlap the high part of the qM register, like AArch32, so the current l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t)); implies extra register moves. But on the good side, the armv8 ADDV instruction allows two instructions to be removed from this fast path. When built for 32-bit, the new form results in the same instruction count; we simply keep using q registers instead of d registers for two more insns. Given that there are currently ifdefs involved, it would certainly be possible to keep the 32-bit path unchanged, if that's thought to be valuable. I did wonder if the armv8 stuff was supposed to be included in the AArch32 arm_neon.h? Is it just an oversight that it's missing? r~ * lex.c (search_line_fast) [__ARM_NEON]: Use __FOO not __FOO__ to detect neon support. Fold the comparison using ADDV when available. diff --git a/libcpp/lex.c b/libcpp/lex.c index 5366dad..6d1823e 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -638,7 +638,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) } } -#elif defined (__ARM_NEON__) +#elif defined (__ARM_NEON) #include arm_neon.h static const uchar * @@ -649,6 +649,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) const uint8x16_t repl_bs = vdupq_n_u8 ('\\'); const uint8x16_t repl_qm = vdupq_n_u8 ('?'); const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL); + const int16x8_t shift = { 0, 0, 0, 0, 8, 8, 8, 8 }; unsigned int misalign, found, mask; const uint8_t *p; @@ -670,10 +671,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) do { - uint8x8_t l; - uint16x4_t m; - uint32x2_t n; uint8x16_t t, u, v, w; + uint16x8_t l; p += 16; data = vld1q_u8 (p); @@ -685,12 +684,24 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); t = vandq_u8 (vorrq_u8 (v, w), xmask); - l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t)); - m = vpaddl_u8 (l); - n = vpaddl_u16 (m); - - found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n, - vshr_n_u64 ((uint64x1_t) n, 24)), 0); + + l = vpaddlq_u8 (t); + l = vshlq_u16 (l, shift); + + /* ??? Ideally, this would be if (__ARM_ARCH = 8) since the ADDV insn +reduces the instruction count by two. But vaddvq is not present in +the arm32 arm_neon.h, nor does AArch64 define __ARM_ARCH. */ +#ifdef __aarch64__ + found = vaddvq_u16 (l); +#else + { + uint32x4_t m = vpaddlq_u16 (l); + uint64x2_t n = vpaddlq_u32 (m); + uint64x1_t o = vget_low_u64 (n) + vget_high_u64 (n); + found = vget_lane_u32 ((uint32x2_t)o, 0); + } +#endif + found = mask; } while (!found); -- 1.9.3
Re: [FORTRAN PATCH] Two -Wlogical-not-parentheses fixes (PR fortran/62270)
Marek Polacek wrote: This patch fixes the last two spots where -Wlogical-not-parentheses warns. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62270#c3 if you want more info about the changes. Bootstrapped/regtested on x86_64-linux, ok for trunk? Looks good to me. Thanks for the patch! Tobias 2014-09-02 Marek Polacek pola...@redhat.com PR fortran/62270 * interface.c (compare_parameter): Fix condition. * trans-expr.c (gfc_conv_procedure_call): Likewise. * gfortran.dg/pointer_intent_7.f90: Adjust dg-error.
[PATCH][ARM] Fix %N output modifier
Hi all, Following the transition to UAL I noticed that the %N output modifier doesn't really work. It calls fp_const_from_val to get the VFP encoding from a real value, but fp_const_from_val only supports the floating point zero constant and ICEs for all other values, making it useless for pretty much all purposes. For example, the testcase in this patch ICEs. With the conversion to UAL we no longer output the VFP encoded form of floating point constants but rather their natural representation. This patch makes sure that %N negates its operand properly and outputs it as a normal floating point number. It also handles operand lossage (if, for example, the user passed in a register instead of a constant). Ok for upstream? 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/arm/arm.c (fp_const_from_val): Delete prototype and definition. (arm_print_operand): Don't use fp_const_from_val in the 'N' case. Report unsupported operand. 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * gcc.target/arm/n_output_modifier_1.c: New test.commit 592aa40e87285c53229ccf544691611e9c78b578 Author: Kyrylo Tkachov kyrylo.tkac...@arm.com Date: Wed Aug 13 14:08:02 2014 +0100 [ARM] Fix %N output modifier diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 5f7cbb1..a48ca4e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -100,7 +100,6 @@ static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update static void arm_print_operand (FILE *, rtx, int); static void arm_print_operand_address (FILE *, rtx); static bool arm_print_operand_punct_valid_p (unsigned char code); -static const char *fp_const_from_val (REAL_VALUE_TYPE *); static arm_cc get_arm_condition_code (rtx); static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); static const char *output_multi_immediate (rtx *, const char *, const char *, @@ -17534,17 +17533,6 @@ arm_reorg (void) /* Routines to output assembly language. */ -/* Return string representation of passed in real value. */ -static const char * -fp_const_from_val (REAL_VALUE_TYPE *r) -{ - if (!fp_consts_inited) -init_fp_table (); - - gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0)); - return 0; -} - /* OPERANDS[0] is the entire list of insns that constitute pop, OPERANDS[1] is the base register, RETURN_PC is true iff return insn is in the list, UPDATE is true iff the list contains explicit @@ -21533,13 +21521,21 @@ arm_print_operand (FILE *stream, rtx x, int code) case 'N': { - REAL_VALUE_TYPE r; - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - r = real_value_negate (r); - fprintf (stream, %s, fp_const_from_val (r)); - } - return; +if (CONST_DOUBLE_P (x)) + { +char fpstr[20]; +REAL_VALUE_TYPE r; + +REAL_VALUE_FROM_CONST_DOUBLE (r, x); +r = real_value_negate (r); +real_to_decimal (fpstr, r, sizeof (fpstr), 0, 1); +fprintf (stream, %s, fpstr); + } +else + output_operand_lossage (Unsupported operand for code '%c', code); +return; + } /* An integer or symbol address without a preceding # sign. */ case 'c': switch (GET_CODE (x)) diff --git a/gcc/testsuite/gcc.target/arm/n_output_modifier_1.c b/gcc/testsuite/gcc.target/arm/n_output_modifier_1.c new file mode 100644 index 000..e94914b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/n_output_modifier_1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_vfp_ok } */ +/* { dg-final { scan-assembler-times -5.0e-1 1 } } */ + + +int +foo (int a) +{ + int result = 0; + __asm__ (%0, %N1 +: =r(result) +: Dt(0.5) +: ); + return result; +} +
[PATCH][ARM] Fix up vectoriser dumping and scanning in some tests
Hi all, I noticed for some reason that these tests don't properly dump the vectoriser pass before scanning, but it doesn't show up because the corresponding target predicate in the scan-tree-dump directive was never true on arm! I think these tests were initially supposed to go somewhere in the midend but ended up being in gcc.target/arm/. Since they're in gcc.target/arm, just add the ARMv8 NEON options and expect it to always vectorise. These tests pass on arm just fine with these changes. Ok for trunk? 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * gcc.target/arm/vect-rounding-btruncf.c: Dump vectoriser output. Remove restriction on tree dump scan. * gcc.target/arm/vect-rounding-ceilf.c: Likewise. * gcc.target/arm/vect-rounding-floorf.c: Likewise. * gcc.target/arm/vect-rounding-roundf.c: Likewise.commit 0a1ce57744f00efd14b93b8f6decb6d5eff20c66 Author: Kyrylo Tkachov kyrylo.tkac...@arm.com Date: Fri Aug 22 17:25:29 2014 +0100 [ARM] Fix testsuite flags for vect tests diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c index ff033d4..5616837 100644 --- a/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c +++ b/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_v8_neon_ok } */ -/* { dg-options -O2 -ffast-math -ftree-vectorize } */ +/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */ /* { dg-add-options arm_v8_neon } */ #define N 32 @@ -14,5 +14,5 @@ foo (float *output, float *input) output[i] = __builtin_truncf (input[i]); } -/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_btruncf } } } */ +/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */ /* { dg-final { cleanup-tree-dump vect } } */ diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c index b54f358..cb8f1d5 100644 --- a/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c +++ b/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_v8_neon_ok } */ -/* { dg-options -O2 -ffast-math -ftree-vectorize } */ +/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */ /* { dg-add-options arm_v8_neon } */ #define N 32 @@ -14,5 +14,5 @@ foo (float *output, float *input) output[i] = __builtin_ceilf (input[i]); } -/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_ceilf } } } */ +/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */ /* { dg-final { cleanup-tree-dump vect } } */ diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c index 02e188d..bf68af7 100644 --- a/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c +++ b/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_v8_neon_ok } */ -/* { dg-options -O2 -ffast-math -ftree-vectorize } */ +/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */ /* { dg-add-options arm_v8_neon } */ #define N 32 @@ -14,5 +14,5 @@ foo (float *output, float *input) output[i] = __builtin_floorf (input[i]); } -/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_floorf } } } */ +/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */ /* { dg-final { cleanup-tree-dump vect } } */ diff --git a/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c b/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c index 85e2058..7c0a1b4 100644 --- a/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c +++ b/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_v8_neon_ok } */ -/* { dg-options -O2 -ffast-math -ftree-vectorize } */ +/* { dg-options -O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all } */ /* { dg-add-options arm_v8_neon } */ #define N 32 @@ -14,5 +14,5 @@ foo (float *output, float *input) output[i] = __builtin_roundf (input[i]); } -/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect { target vect_call_roundf } } } */ +/* { dg-final { scan-tree-dump-times vectorized 1 loops 1 vect } } */ /* { dg-final { cleanup-tree-dump vect } } */
Re: [PATCH][AArch64] Use CC_Z and CC_NZ with csinc and similar instructions
Hi Richard, Sorry for the delay. On 19/08/14 17:09, Richard Henderson wrote: (define_special_predicate cc_register_zero (match_code reg) { return (REGNO (op) == CC_REGNUM (GET_MODE (op) == CCmode || GET_MODE (op) == CC_Zmode || GET_MODE (op) == CC_NZmode)); }) ... and now that I read the backend more closely, I see _zero was a bad name. But more importantly, I see no connection between the comparison used and the CCmode being accepted. And if we fix that, why are you restricting to just Z and NZ? What's wrong with e.g. CFPmode? I'm not sure why restricted the modes for csinc. In the i386 backend, we check comparison+mode correspondence like (match_operator 4 ix86_carry_flag_operator [(match_operand 3 flags_reg_operand) (const_int 0)]) I think you'll want something similar. In the case of CSINC, we can accept all conditions, so let's start with the most general: (match_operator:GPI 2 aarch64_comparison_operation [(reg CC_REGNUM) (const_int 0)] or even (match_operand:GPI 2 aarch64_comparison_operation ) with (define_predicate aarch64_comparison_operation (match_code eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, unordered,ordered,unlt,unle,unge,ungt) { if (XEXP (op, 1) != const0_rtx) return false; rtx op0 = XEXP (op, 0); if (!REG_P (op0) || REGNO (op0) != CC_REGNUM) return false; return aarch64_get_condition_code (op) = 0; }) where aarch64_get_condition_code is (1) exported (2) adjusted to return int not unsigned (3) adjusted to not abort, but return -1 for invalid combinations. and the two existing users of aarch64_get_condition_code are adjusted to gcc_assert that the return value is valid. Implementing that seems to work fine. Bootstrap and testing were successful. How's this version then? Kyrill 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/aarch64/predicates.md (aarch64_comparison_operation): New special predicate. * config/aarch64/aarch64.md (*csinc2mode_insn): Use aarch64_comparison_operation instead of matching an operator. Update operand numbers. (csinc3mode_insn): Likewise. (*csinv3mode_insn): Likewise. (*csneg3mode_insn): Likewise. (ffsmode2): Update gen_csinc3mode_insn callsite. * config/aarch64/aarch64.c (aarch64_get_condition_code): Export. Return -1 instead of aborting on invalid condition codes. (aarch64_print_operand): Update aarch64_get_condition_code callsites to assert that the returned condition code is valid. r~ commit a70dc696b967196d6662479a44682e3f423377ac Author: Kyrylo Tkachov kyrylo.tkac...@arm.com Date: Mon Aug 4 16:49:24 2014 +0100 [AArch64] Generalise condition code usage for csinc pattterns diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index b5335bf..d3be619 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -174,6 +174,7 @@ struct tune_params }; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); +int aarch64_get_condition_code (rtx); bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); bool aarch64_cannot_change_mode_class (enum machine_mode, enum machine_mode, diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ba45d00..809d562 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3589,7 +3589,7 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) return CCmode; } -static unsigned +int aarch64_get_condition_code (rtx x) { enum machine_mode mode = GET_MODE (XEXP (x, 0)); @@ -3616,7 +3616,7 @@ aarch64_get_condition_code (rtx x) case UNLE: return AARCH64_LE; case UNGT: return AARCH64_HI; case UNGE: return AARCH64_PL; - default: gcc_unreachable (); + default: return -1; } break; @@ -3633,7 +3633,7 @@ aarch64_get_condition_code (rtx x) case GTU: return AARCH64_HI; case LEU: return AARCH64_LS; case LTU: return AARCH64_CC; - default: gcc_unreachable (); + default: return -1; } break; @@ -3652,7 +3652,7 @@ aarch64_get_condition_code (rtx x) case GTU: return AARCH64_CC; case LEU: return AARCH64_CS; case LTU: return AARCH64_HI; - default: gcc_unreachable (); + default: return -1; } break; @@ -3663,7 +3663,7 @@ aarch64_get_condition_code (rtx x) case EQ: return AARCH64_EQ; case GE: return AARCH64_PL; case LT: return AARCH64_MI; - default: gcc_unreachable (); + default: return -1; } break; @@ -3672,12 +3672,12 @@ aarch64_get_condition_code (rtx x) { case NE: return AARCH64_NE; case EQ: return AARCH64_EQ; - default: gcc_unreachable (); + default: return -1; } break; default: - gcc_unreachable (); + return -1; break; } } @@ -3795,39 +3795,48 @@ aarch64_print_operand (FILE *f, rtx x, char code) break; case 'm': - /* Print a condition (eq, ne, etc).
[PATCH][ARM] Disable store_minmaxsi pattern for arm_restrict_i
Hi all, The store_minmaxsi produces a cmp + ite + 2 conditional stores and is thus inappropriate when the ARMv8-A IT block rules are in place. Previously we had disabled it for speed optimisations, but it should be disabled completely when -mrestrict-it is in effect. Ok for trunk and 4.9? Tested arm-none-eabi. 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/arm/arm.md (*store_minmaxsi): Disable for arm_restrict_it.commit 1a721e0ea3e204adff36bdfd803a2071354421b2 Author: Kyrylo Tkachov kyrylo.tkac...@arm.com Date: Mon Sep 1 12:11:10 2014 +0100 [ARM] Disable store_minmaxsi for arm_restrict_it diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 766b646..cb6cc81 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -3449,7 +3449,7 @@ (define_insn *store_minmaxsi [(match_operand:SI 1 s_register_operand r) (match_operand:SI 2 s_register_operand r)])) (clobber (reg:CC CC_REGNUM))] - TARGET_32BIT optimize_function_for_size_p (cfun) + TARGET_32BIT optimize_function_for_size_p (cfun) !arm_restrict_it * operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]);
[PATCH][ARM][2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions
Hi all, In continuation of patch [1/2]... We can use the vector forms of the vcvt{a,p,m} instructions to vectorise the l{round, ceil, floor}f functions. Builtins are added and the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION implementation is updated to wire up the vectorised forms of these functions to the midend. Bootstrapped and tested on arm-none-linux-gnueabihf. Ok for trunk? Thanks, Kyrill 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * config/arm/neon.md (neon_vcvtNEON_VCVT:nvrint_variantsu_optabVCVTF:mode v_cmp_result): New pattern. * config/arm/iterators.md (NEON_VCVT): New int iterator. * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf, vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf, vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions. * config/arm/arm.c (arm_builtin_vectorized_function): Handle BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF. 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * gcc.target/arm/vect-lceilf_1.c: New test. * gcc.target/arm/vect-lfloorf_1.c: Likewise. * gcc.target/arm/vect-lroundf_1.c: Likewise.commit 3854d95bace665f6d9d8c007702b6d26f6fe07c2 Author: Kyrylo Tkachov kyrylo.tkac...@arm.com Date: Fri Aug 22 17:23:20 2014 +0100 [ARM] Vectorise lroundf, lfloorf, lceilf on ARMv8-A diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index ff66c60..c3b8518 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -29945,6 +29945,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) { enum machine_mode in_mode, out_mode; int in_n, out_n; + bool out_unsigned_p = TYPE_UNSIGNED (type_out); if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) @@ -29990,6 +29991,36 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) return ARM_FIND_VRINT_VARIANT (vrintz); case BUILT_IN_ROUNDF: return ARM_FIND_VRINT_VARIANT (vrinta); +#undef ARM_CHECK_BUILTIN_MODE_1 +#define ARM_CHECK_BUILTIN_MODE_1(C) \ + (out_mode == SImode out_n == C \ +in_mode == SFmode in_n == C) + +#define ARM_FIND_VCVT_VARIANT(N) \ + (ARM_CHECK_BUILTIN_MODE (2) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \ + : (ARM_CHECK_BUILTIN_MODE (4) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \ + : NULL_TREE)) + +#define ARM_FIND_VCVTU_VARIANT(N) \ + (ARM_CHECK_BUILTIN_MODE (2) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \ + : (ARM_CHECK_BUILTIN_MODE (4) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \ + : NULL_TREE)) + case BUILT_IN_LROUNDF: +return out_unsigned_p + ? ARM_FIND_VCVTU_VARIANT (vcvta) + : ARM_FIND_VCVT_VARIANT (vcvta); + case BUILT_IN_LCEILF: +return out_unsigned_p + ? ARM_FIND_VCVTU_VARIANT (vcvtp) + : ARM_FIND_VCVT_VARIANT (vcvtp); + case BUILT_IN_LFLOORF: +return out_unsigned_p + ? ARM_FIND_VCVTU_VARIANT (vcvtm) + : ARM_FIND_VCVT_VARIANT (vcvtm); #undef ARM_CHECK_BUILTIN_MODE #define ARM_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode out_n == C \ @@ -30020,9 +30051,12 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) } return NULL_TREE; } +#undef ARM_FIND_VCVT_VARIANT +#undef ARM_FIND_VCVTU_VARIANT #undef ARM_CHECK_BUILTIN_MODE #undef ARM_FIND_VRINT_VARIANT + /* The AAPCS sets the maximum alignment of a vector to 64 bits. */ static HOST_WIDE_INT arm_vector_alignment (const_tree type) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index f4531f3..efe5bda 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -141,6 +141,18 @@ VAR2 (RINT, vrintp, v2sf, v4sf), VAR2 (RINT, vrintm, v2sf, v4sf), VAR2 (RINT, vrintz, v2sf, v4sf), VAR2 (RINT, vrintx, v2sf, v4sf), +VAR1 (RINT, vcvtav2sf, v2si), +VAR1 (RINT, vcvtav4sf, v4si), +VAR1 (RINT, vcvtauv2sf, v2si), +VAR1 (RINT, vcvtauv4sf, v4si), +VAR1 (RINT, vcvtpv2sf, v2si), +VAR1 (RINT, vcvtpv4sf, v4si), +VAR1 (RINT, vcvtpuv2sf, v2si), +VAR1 (RINT, vcvtpuv4sf, v4si), +VAR1 (RINT, vcvtmv2sf, v2si), +VAR1 (RINT, vcvtmv4sf, v4si), +VAR1 (RINT, vcvtmuv2sf, v2si), +VAR1 (RINT, vcvtmuv4sf, v4si), VAR1 (VTBL, vtbl1, v8qi), VAR1 (VTBL, vtbl2, v8qi), VAR1 (VTBL, vtbl3, v8qi), diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index f7e0e14..021372a 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -223,6 +223,8 @@ (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM UNSPEC_NVRINTX UNSPEC_NVRINTA
[PATCH][ARM][1/2] Implement lceil, lfloor, lround optabs with new ARMv8-A instructions
Hi all, This patch implements the {lceil, lfloor, lround}si{sf, df}2 optabs in a similar way to fcvt in aarch64. We use the new ARMv8 FP convert with rounding instructions vcvt{a,p,m} for that. Bootstrapped and tested on arm-none-linux-gnueabihf. Ok for trunk? Thanks, Kyrill 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * config/arm/iterators.md (FIXUORS): New code iterator. (VCVT): New int iterator. (su_optab): New code attribute. (su): Likewise. * config/arm/vfp.md (lvrint_patternsu_optabmodesi2): New pattern. 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * gcc.target/arm/lceil-vcvt_1.c: New test. * gcc.target/arm/lfloor-vcvt_1.c: Likewise. * gcc.target/arm/lround-vcvt_1.c: Likewise.commit ba3ec05be54d74ee53d287dfa4eb7b5508292e59 Author: Kyrylo Tkachov kyrylo.tkac...@arm.com Date: Thu Aug 21 12:28:00 2014 +0100 [ARM] Implement ARMv8-A vcvt* optabs diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 6fe6eef..f7e0e14 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -194,6 +194,9 @@ (define_code_iterator SE [sign_extend zero_extend]) ;; Right shifts (define_code_iterator rshifts [ashiftrt lshiftrt]) +;; Iterator for integer conversions +(define_code_iterator FIXUORS [fix unsigned_fix]) + ;; Binary operators whose second operand can be shifted. (define_code_iterator shiftable_ops [plus minus ior xor and]) @@ -215,6 +218,8 @@ (define_code_attr arith_shift_insn (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) +(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) + (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN]) @@ -519,6 +524,13 @@ (define_code_attr optab [(ltu ltu) (geu geu)]) ;; Assembler mnemonics for signedness of widening operations. (define_code_attr US [(sign_extend s) (zero_extend u)]) +;; Signedness suffix for float-fixed conversions. Empty for signed +;; conversion. +(define_code_attr su_optab [(fix ) (unsigned_fix u)]) + +;; Sign prefix to use in instruction type suffixes, i.e. s32, u32. +(define_code_attr su [(fix s) (unsigned_fix u)]) + ;; Right shifts (define_code_attr shift [(ashiftrt ashr) (lshiftrt lshr)]) (define_code_attr shifttype [(ashiftrt signed) (lshiftrt unsigned)]) diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 90e001c..a203449 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -1306,6 +1306,18 @@ (define_insn vrint_patternSDF:mode2 (set_attr conds vrint_conds)] ) +;; Implements the lround, lfloor and lceil optabs. +(define_insn lvrint_patternsu_optabmodesi2 + [(set (match_operand:SI 0 register_operand =t) +(FIXUORS:SI (unspec:SDF +[(match_operand:SDF 1 + register_operand F_constraint)] VCVT)))] + TARGET_HARD_FLOAT TARGET_FPU_ARMV8 vfp_double_cond + vcvtvrint_variant%?.su32.V_if_elem\\t%0, %V_reg1 + [(set_attr predicable no) + (set_attr type f_cvtf2i)] +) + ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. ;; The 'smax' and 'smin' RTL standard pattern names do not specify which ;; operand will be returned when both operands are zero (i.e. they may not diff --git a/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c b/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c new file mode 100644 index 000..bbe4271 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options -O2 -march=armv8-a } */ +/* { dg-add-options arm_v8_vfp } */ + +int +foofloat (float x) +{ + return __builtin_lceilf (x); +} + +/* { dg-final { scan-assembler-times vcvtp.s32.f32\ts\[0-9\]+, s\[0-9\]+ 1 } } */ + + +int +foodouble (double x) +{ + return __builtin_lceil (x); +} + +/* { dg-final { scan-assembler-times vcvtp.s32.f64\ts\[0-9\]+, d\[0-9\]+ 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c b/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c new file mode 100644 index 000..88671d3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options -O2 -march=armv8-a } */ +/* { dg-add-options arm_v8_vfp } */ + +int +foofloat (float x) +{ + return __builtin_lfloorf (x); +} + +/* { dg-final { scan-assembler-times vcvtm.s32.f32\ts\[0-9\]+, s\[0-9\]+ 1 } } */ + + +int +foodouble (double x) +{ + return __builtin_lfloor (x); +} + +/* { dg-final { scan-assembler-times vcvtm.s32.f64\ts\[0-9\]+, d\[0-9\]+ 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c b/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c new file mode 100644 index 000..8b1f6a7 --- /dev/null +++
Re: [C++ Patch] PR 58102 aka DR 1405
On 09/02/2014 11:07 AM, Paolo Carlini wrote: Anyway, what about the below? Certainly works for the tests which we have got. Hmm. This is definitely an improvement, as it allows a subset of a non-volatile glvalue of literal type that refers to a non-volatile object whose lifetime began within the evalution of e But it doesn't cover all of that, and in any case we shouldn't need to explicitly handle that just for types with mutable subobjects. I think perhaps it would be better to remove that hunk as in your initial patch and replace it with a check in constant_value_1 and an explanation in non_const_var_error. Jason
Re: [PATCH][AArch64] Use CC_Z and CC_NZ with csinc and similar instructions
On 09/02/2014 08:34 AM, Kyrill Tkachov wrote: 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/aarch64/predicates.md (aarch64_comparison_operation): New special predicate. * config/aarch64/aarch64.md (*csinc2mode_insn): Use aarch64_comparison_operation instead of matching an operator. Update operand numbers. (csinc3mode_insn): Likewise. (*csinv3mode_insn): Likewise. (*csneg3mode_insn): Likewise. (ffsmode2): Update gen_csinc3mode_insn callsite. * config/aarch64/aarch64.c (aarch64_get_condition_code): Export. Return -1 instead of aborting on invalid condition codes. (aarch64_print_operand): Update aarch64_get_condition_code callsites to assert that the returned condition code is valid. Looks good to me. Note that you missed the ChangeLog entry for aarch64-protos.h. r~
Re: [RFA:] testsuite: robustify g++.old-deja/g++.eh/badalloc1.C for 64-bit systems
On Sep 2, 2014, at 3:28 AM, Hans-Peter Nilsson hans-peter.nils...@axis.com wrote: In a native x86_64-linux toolchain in which eh-table-registration is done explicitly (i.e. dl_iterate_phdr and PT_GNU_EH_FRAME is *not* assumed, as that eliminates the issue), the memory overhead for exception-initialization goes beyond the 32768 bytes assumed in badalloc1.C and the test fails for reasons not intended by the test. Ok to commit? Ok. If someone with deeper insight into the details wants to chime in, or robustify it some more… the test strikes me as unfortunately brittle. I’d be tempted to make it the maximal size on any system (subject to smaller systems limiting it, cause they are small).
Re: [PATCH][ARM][1/2] Implement lceil, lfloor, lround optabs with new ARMv8-A instructions
On 02/09/14 16:34, Kyrill Tkachov wrote: Hi all, This patch implements the {lceil, lfloor, lround}si{sf, df}2 optabs in a similar way to fcvt in aarch64. We use the new ARMv8 FP convert with rounding instructions vcvt{a,p,m} for that. Bootstrapped and tested on arm-none-linux-gnueabihf. Ok for trunk? Ok . Ramana Thanks, Kyrill 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * config/arm/iterators.md (FIXUORS): New code iterator. (VCVT): New int iterator. (su_optab): New code attribute. (su): Likewise. * config/arm/vfp.md (lvrint_patternsu_optabmodesi2): New pattern. 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * gcc.target/arm/lceil-vcvt_1.c: New test. * gcc.target/arm/lfloor-vcvt_1.c: Likewise. * gcc.target/arm/lround-vcvt_1.c: Likewise.
Re: [PATCH][ARM][2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions
On 02/09/14 16:34, Kyrill Tkachov wrote: Hi all, In continuation of patch [1/2]... We can use the vector forms of the vcvt{a,p,m} instructions to vectorise the l{round, ceil, floor}f functions. Builtins are added and the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION implementation is updated to wire up the vectorised forms of these functions to the midend. Bootstrapped and tested on arm-none-linux-gnueabihf. Ok for trunk? Ok - thanks. Ramana Thanks, Kyrill 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * config/arm/neon.md (neon_vcvtNEON_VCVT:nvrint_variantsu_optabVCVTF:mode v_cmp_result): New pattern. * config/arm/iterators.md (NEON_VCVT): New int iterator. * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf, vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf, vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions. * config/arm/arm.c (arm_builtin_vectorized_function): Handle BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF. 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com PR target/62275 * gcc.target/arm/vect-lceilf_1.c: New test. * gcc.target/arm/vect-lfloorf_1.c: Likewise. * gcc.target/arm/vect-lroundf_1.c: Likewise.
Re: [PATCH] aarch64: Enable Neon search_line_fast
On 02/09/14 16:28, Richard Henderson wrote: Is it intentional or not that AArch64 does not define __ARM_NEON__? Yes I remember so, __ARM_NEON__ is not ACLE compatible so we haven't defined it for AArch64 - on AArch32 and AArch64 we now have __ARM_NEON defined so that's the macro to be used. Otherwise, here's a better way to fold the test bits. AArch64 of course does not have dN+1 overlap the high part of the qM register, like AArch32, so the current l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t)); implies extra register moves. But on the good side, the armv8 ADDV instruction allows two instructions to be removed from this fast path. Cool. When built for 32-bit, the new form results in the same instruction count; we simply keep using q registers instead of d registers for two more insns. Given that there are currently ifdefs involved, it would certainly be possible to keep the 32-bit path unchanged, if that's thought to be valuable. The ADDV instruction isn't available on the AArch32 side IIRC. Given that situation there is no intrinsic for ADDV on the AArch32 side which is why this doesn't exist in the AArch32 version of arm_neon.h :( I'll need to take a look at the new code generated for AArch32 and will probably be able to get back tomorrow as I'll disappear shortly. I did wonder if the armv8 stuff was supposed to be included in the AArch32 arm_neon.h? Is it just an oversight that it's missing? The ARMv8 stuff is included for arm_neon.h - I believe we've implemented everything that's ARMv8 specific in arm_neon.h for AArch32 . Anything missing would be an oversight. regards Ramana r~ * lex.c (search_line_fast) [__ARM_NEON]: Use __FOO not __FOO__ to detect neon support. Fold the comparison using ADDV when available. diff --git a/libcpp/lex.c b/libcpp/lex.c index 5366dad..6d1823e 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -638,7 +638,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) } } -#elif defined (__ARM_NEON__) +#elif defined (__ARM_NEON) #include arm_neon.h static const uchar * @@ -649,6 +649,7 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) const uint8x16_t repl_bs = vdupq_n_u8 ('\\'); const uint8x16_t repl_qm = vdupq_n_u8 ('?'); const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL); + const int16x8_t shift = { 0, 0, 0, 0, 8, 8, 8, 8 }; unsigned int misalign, found, mask; const uint8_t *p; @@ -670,10 +671,8 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) do { - uint8x8_t l; - uint16x4_t m; - uint32x2_t n; uint8x16_t t, u, v, w; + uint16x8_t l; p += 16; data = vld1q_u8 (p); @@ -685,12 +684,24 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); t = vandq_u8 (vorrq_u8 (v, w), xmask); - l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t)); - m = vpaddl_u8 (l); - n = vpaddl_u16 (m); - - found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n, - vshr_n_u64 ((uint64x1_t) n, 24)), 0); + + l = vpaddlq_u8 (t); + l = vshlq_u16 (l, shift); + + /* ??? Ideally, this would be if (__ARM_ARCH = 8) since the ADDV insn +reduces the instruction count by two. But vaddvq is not present in +the arm32 arm_neon.h, nor does AArch64 define __ARM_ARCH. */ +#ifdef __aarch64__ + found = vaddvq_u16 (l); +#else + { + uint32x4_t m = vpaddlq_u16 (l); + uint64x2_t n = vpaddlq_u32 (m); + uint64x1_t o = vget_low_u64 (n) + vget_high_u64 (n); + found = vget_lane_u32 ((uint32x2_t)o, 0); + } +#endif + found = mask; } while (!found);
[PATCH] Enable -Wlogical-not-parentheses by -Wall
Now that PR61271 and PR62270 have been fixed, we can enable -Wlogical-not-parentheses by -Wall. I think this warning proved useful. Bootstrapped/regtested on x86_64-linux and ppc64-linux, ok for trunk? 2014-08-26 Marek Polacek pola...@redhat.com * doc/invoke.texi: Document that -Wlogical-not-parentheses is enabled by -Wall. c-family/ * c.opt (Wlogical-not-parentheses): Enable by -Wall. diff --git gcc/c-family/c.opt gcc/c-family/c.opt index 210a099..643f256 100644 --- gcc/c-family/c.opt +++ gcc/c-family/c.opt @@ -519,7 +519,7 @@ C ObjC C++ ObjC++ Var(warn_logical_op) Init(0) Warning Warn when a logical operator is suspiciously always evaluating to true or false Wlogical-not-parentheses -C ObjC C++ ObjC++ Var(warn_logical_not_paren) Warning +C ObjC C++ ObjC++ Var(warn_logical_not_paren) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn when logical not is used on the left hand side operand of a comparison Wlong-long diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi index d15d4a9..9864708 100644 --- gcc/doc/invoke.texi +++ gcc/doc/invoke.texi @@ -4800,6 +4800,8 @@ parentheses: if ((!a) 1) @{ @dots{} @} @end smallexample +This warning is enabled by @option{-Wall}. + @item -Waggregate-return @opindex Waggregate-return @opindex Wno-aggregate-return Marek
Re: [PATCH][AArch64] Use CC_Z and CC_NZ with csinc and similar instructions
On 02/09/14 16:47, Richard Henderson wrote: On 09/02/2014 08:34 AM, Kyrill Tkachov wrote: 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/aarch64/predicates.md (aarch64_comparison_operation): New special predicate. * config/aarch64/aarch64.md (*csinc2mode_insn): Use aarch64_comparison_operation instead of matching an operator. Update operand numbers. (csinc3mode_insn): Likewise. (*csinv3mode_insn): Likewise. (*csneg3mode_insn): Likewise. (ffsmode2): Update gen_csinc3mode_insn callsite. * config/aarch64/aarch64.c (aarch64_get_condition_code): Export. Return -1 instead of aborting on invalid condition codes. (aarch64_print_operand): Update aarch64_get_condition_code callsites to assert that the returned condition code is valid. Looks good to me. Note that you missed the ChangeLog entry for aarch64-protos.h. Thanks, it seems I had written the export part in the aarch64.c entry. Committed as r214824 with ChangeLog: 2014-09-02 Kyrylo Tkachov kyrylo.tkac...@arm.com * config/aarch64/predicates.md (aarch64_comparison_operation): New special predicate. * config/aarch64/aarch64.md (*csinc2mode_insn): Use aarch64_comparison_operation instead of matching an operator. Update operand numbers. (csinc3mode_insn): Likewise. (*csinv3mode_insn): Likewise. (*csneg3mode_insn): Likewise. (ffsmode2): Update gen_csinc3mode_insn callsite. * config/aarch64/aarch64.c (aarch64_get_condition_code): Return -1 instead of aborting on invalid condition codes. (aarch64_print_operand): Update aarch64_get_condition_code callsites to assert that the returned condition code is valid. * config/aarch64/aarch64-protos.h (aarch64_get_condition_code): Export. Kyrill r~
Re: [PING][PATCH] Fix environment variables restoring in GCC testsuite.
When I ran Asan test on Asan-bootstrapped GCC, some of them fail with memory leaks into GCC, even if Lsan is disabled. This caused by slightly wrong logic in saving/restoring env variables functionality in gcc-dg.exp (some tests override ASAN_OPTIONS and this env variable isn't restored correcty). Ok to commit? Ok. I’ll note there are 22 other places like this.
Re: [PATCH] aarch64: Enable Neon search_line_fast
On 09/02/2014 08:51 AM, Ramana Radhakrishnan wrote: The ADDV instruction isn't available on the AArch32 side IIRC. Given that situation there is no intrinsic for ADDV on the AArch32 side which is why this doesn't exist in the AArch32 version of arm_neon.h :( Whoops, yes indeed. I clearly mis-read the spec. r~
Re: [PATCH AArch64 2/2] Replace temporary inline assembler for vget_high
On 12 August 2014 11:12, Alan Lawrence alan.lawre...@arm.com wrote: This patch replaces the current inline assembler for the vget_high intrinsics in arm_neon.h with a sequence of other calls, in a similar fashion to vget_low. Unlike the assembler, these are all transparent to the front-end, so should enable better optimization through the mid-end. Tested check-gcc and check-g++ and aarch64-none-elf and aarch64_be-none-elf (including new tests in previous patch!). I think we are still waiting on ChangeLogs for this and the related patch? /Marcus
Re: [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.
On 19 August 2014 18:02, Alan Lawrence alan.lawre...@arm.com wrote: gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_rbitmode): New pattern. * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin. * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8): Replace temporary asm with call to builtin. (vrbit_p8, vrbitq_p8): New functions. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vrbit_1.c: New test. OK /Marcus
Re: [PATCH][AArch64] Remove varargs from aarch64_simd_expand_args
On 20 August 2014 10:20, Alan Lawrence alan.lawre...@arm.com wrote: gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Replace varargs with pointer parameter. (aarch64_simd_expand_builtin): pass pointer into previous. OK /Marcus
Re: [PATCH][AArch64] One-liner: fix type of an add in SIMD registers
On 20 August 2014 10:25, Alan Lawrence alan.lawre...@arm.com wrote: The SIMD-register variant is miscategorized as alu_reg despite not using any ALU registers, and should be neon_add for e.g. scheduling. Tested with check-gcc and check-g++ on aarch64-none-elf and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64.md (adddi3_aarch64): set type to neon_add. OK and back port please. /Marcus
Re: [PATCH][AArch64] Tidy: remove unused qualifier_const_pointer
On 20 August 2014 10:31, Alan Lawrence alan.lawre...@arm.com wrote: The only reference is in a comment. gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Remove qualifier_const_pointer, update comment. OK /M
Re: [PATCH] Force rtl templates to be inlined
Or we simply should make -finline work at -O0 (I suppose it might already work?) and use it. Yes that's probably better. There are more hot inlines in the stage 1 profile (like wi::storage_ref or vec::length) I suspect with the ongoing C++'ification that will get worse. -Andi -- a...@linux.intel.com -- Speaking for myself only
Re: [C PATCH] Backport a fix for PR62294 to 4.9
On Tue, 2 Sep 2014, Marek Polacek wrote: PR62294 reports that 4.9 does not emit an incompatible pointer type warning in certain scenario. I unknowingly broke this in r207335, and then fixed it in r210980, which is a follow-up to the former. But 4.9 doesn't have the latter. This patch is basically a backport of r210980, only without the traditional conversion stuff. Bootstrapped/regtested on x86_64-linux, ok for 4.9? OK with a testcase specifically for the regression case added on trunk and 4.9 if there isn't one already. -- Joseph S. Myers jos...@codesourcery.com
Re: [Patch, Fortran] PRs 61881/61888 - Fix issues with SIZEOF, CLASS(*) and assumed-rank
Hi Tobias! On Sat, 26 Jul 2014 01:47:02 +0200, Tobias Burnus bur...@net-b.de wrote: 2014-07-26 Tobias Burnus bur...@net-b.de * check.c (gfc_check_sizeof): Permit for assumed type if and only if it has an array descriptor. * intrinsic.c (do_ts29113_check): Permit SIZEOF. (add_functions): SIZEOF is an Inquiry function. * intrinsic.texi (SIZEOF): Add note that only contiguous arrays are permitted. * trans-expr.c (gfc_conv_intrinsic_to_class): Handle assumed rank. * trans-intrinsic.c (gfc_conv_intrinsic_sizeof): Handle assumed type + array descriptor, CLASS and assumed rank. (gfc_conv_intrinsic_storage_size): Handle class arrays. 2014-07-26 Tobias Burnus bur...@net-b.de * gfortran.dg/sizeof_2.f90: Change dg-error. * gfortran.dg/sizeof_4.f90: New. * gfortran.dg/storage_size_1.f08: Correct expected value. I noticed that the sizeof_4.f90 test case has not been checked in, probably just forgot to svn add the file? Searching for it in my emails, I also noticed that a year ago a similar patch has been posted in http://news.gmane.org/find-root.php?message_id=%3CCAKwh3qi633jU-ojPKqRa_16DKWhXn9L2N0Wr4trAG9p1dJ-sXg%40mail.gmail.com%3E, but that is now probably obsolete. Grüße, Thomas pgpidlSQNRkyl.pgp Description: PGP signature
Re: [gomp4] Add tables generation
Hi Bernd, This patch allows to compile binaries with offloading without passing -flto option, and w/o performing link-time optimizations of the host code. How it works: 1. If there is at least one function or global variable to offload, gcc sets flag_generate_lto. This enables writing the bytecode produced by ipa_write_summaries into .gnu.target_lto_* sections (.gnu.lto_* sections are not created). Also this flag emits LTO marker (__gnu_lto_v1). 2. This step is not changed: collect2 scans object files for the LTO marker and fills the list of LTO objects. If the list is not empty, it runs lto-wrapper to perform link-time recompilation. 3. lto-wrapper compiles images for targets. And if -flto option is absent (lto_mode == LTO_MODE_NONE), then it just returns the list of input objects without recompilation. One known issue -- the final binary contains temporary .gnu.target_lto_* sections. This can be solved by adding the following linker script to the list of input files: SECTIONS { /DISCARD/ : { *(.gnu.target_lto_*) } } But I'm sure what is the best way to this automatically. Bootstrap and make check passed, tests with '#pragma omp target' without -flto passed. What do you think? Thanks, -- Ilya --- gcc/cgraphunit.c | 39 +++ gcc/lto-wrapper.c | 68 +-- gcc/omp-low.c | 6 + gcc/passes.c | 2 +- 4 files changed, 73 insertions(+), 42 deletions(-) diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index f0c9f5c..32b35f3 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2040,13 +2040,26 @@ output_in_order (void) free (nodes); } -/* Collect all global variables with omp declare target attribute into - OFFLOAD_VARS. It will be streamed out in ipa_write_summaries. */ +/* Check whether there is at least one function or global variable to offload. + Also collect all such global variables into OFFLOAD_VARS, the functions were + already collected in omp-low.c. They will be streamed out in + ipa_write_summaries. */ -static void -init_offload_var_table (void) +static bool +initialize_offload (void) { + bool have_offload = false; + struct cgraph_node *node; struct varpool_node *vnode; + + FOR_EACH_DEFINED_FUNCTION (node) +if (lookup_attribute (omp declare target, DECL_ATTRIBUTES (node-decl))) + { + have_offload = true; + break; + } + FOR_EACH_DEFINED_VARIABLE (vnode) { if (!lookup_attribute (omp declare target, @@ -2054,13 +2067,17 @@ init_offload_var_table (void) || TREE_CODE (vnode-decl) != VAR_DECL || DECL_SIZE (vnode-decl) == 0) continue; + have_offload = true; vec_safe_push (offload_vars, vnode-decl); } + + return have_offload; } static void ipa_passes (void) { + bool have_offload = false; gcc::pass_manager *passes = g-get_passes (); set_cfun (NULL); @@ -2068,6 +2085,14 @@ ipa_passes (void) gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); + if (!in_lto_p (flag_openacc || flag_openmp)) +{ + have_offload = initialize_offload (); + /* OpenACC / OpenMP offloading requires LTO infrastructure. */ + if (have_offload) + flag_generate_lto = 1; +} + invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL); if (!in_lto_p) @@ -2108,11 +2133,7 @@ ipa_passes (void) if (!in_lto_p) { - init_offload_var_table (); - - if ((flag_openacc || flag_openmp) - !(vec_safe_is_empty (offload_funcs) - vec_safe_is_empty (offload_vars))) + if (have_offload) { section_name_prefix = OMP_SECTION_NAME_PREFIX; ipa_write_summaries (true); diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index 80d10f3..e9245f1 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -668,6 +668,11 @@ run_gcc (unsigned argc, char *argv[]) close (fd); continue; } + /* We may choose not to write out this .opts section in the future. In +that case we'll have to use something else to look for. */ + if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX . opts, + offset, length, errmsg, err)) + have_offload = true; if (!simple_object_find_section (sobj, LTO_SECTION_NAME_PREFIX . opts, offset, length, errmsg, err)) { @@ -675,11 +680,6 @@ run_gcc (unsigned argc, char *argv[]) close (fd); continue; } - /* We may choose not to write out this .opts section in the future. In -that case we'll have to use something else to look for. */ - if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX . opts, - offset, length, errmsg, err)) - have_offload = true; lseek (fd, file_offset + offset, SEEK_SET); data = (char *)xmalloc (length);
[Patch, Fortran] Component declarations overwrite types of Cray Pointee variables
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62174 The typespecs for Cray pointees are overwritten by the typespecs of components with the same name which are declared later. This problem was introduced with Cray pointer support in 4.1.0 and is confirmed up through trunk (5.0). Here is a proposed patch from 4.8.3 (test case comments/ChangeLog descriptions are updated from the submission on bugzilla). The test case demonstrates the problem. FYI, I am currently working with my employer so any future changes I have can comply with GNU's legal requirements. Also my mail client replaces tabs with spaces so I'm sorry for any whitespace issues. 2014-09-02 Fritz Reese reese-fr...@zai.com PR fortran/62174 * decl.c (variable_decl): Don't overwrite typespecs of Cray pointees when matching a component declaration. diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index 4048ac9..7b3c59a 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -1904,8 +1904,9 @@ variable_decl (int elem) } /* If this symbol has already shown up in a Cray Pointer declaration, + and this is not a component declaration, then we want to set the type bail out. */ - if (gfc_option.flag_cray_pointer) + if (gfc_option.flag_cray_pointer gfc_current_state () != COMP_DERIVED) { gfc_find_symbol (name, gfc_current_ns, 1, sym); if (sym != NULL sym-attr.cray_pointee) 2014-09-02 Fritz Reese reese-fr...@zai.com PR fortran/62174 * gcc/testsuite/gfortran.dg/cray_pointers_11.f90: New. diff --git a/gcc/testsuite/gfortran.dg/cray_pointers_11.f90 b/gcc/testsuite/gfortran.dg/cray_pointers_11.f90 new file mode 100644 index 000..038e4dc --- /dev/null +++ b/gcc/testsuite/gfortran.dg/cray_pointers_11.f90 @@ -0,0 +1,22 @@ +! { dg-do compile } +! { dg-options -fcray-pointer } +! +! PR fortran/62174 +! Component declarations within derived types would overwrite the typespec of +! variables with the same name who were Cray pointees. +implicit none + +type t1 + integer i +end type t1 +type(t1) x + +pointer (x_ptr, x) + +type t2 + real x ! should not overwrite x's type +end type t2 + +x%i = 0 ! should see no error here + +end --- Fritz Reese
Re: [PATCH] Force rtl templates to be inlined
I suspect the bulk of them currently are coming from the safe_as_a rtx_insn * calls within NEXT_INSN and PREV_INSN; do you happen to have information handy on that? Yes that's right: - 1.03% lto1[.] bool is_a_helperrtx_insn*::testrtx_def(rtx_def*) ▒ - bool is_a_helperrtx_insn*::testrtx_def(rtx_def*) ▒ - 92.20% bool is_artx_insn*, rtx_def(rtx_def*) ▒ - 98.53% rtx_insn* safe_as_artx_insn*, rtx_def(rtx_def*) ▒ - 73.28% NEXT_INSN(rtx_insn const*) ▒
RFA: Document first operand to RTX_AUTOINC
As Jeff suggested here: https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00390.html this patch documents that the first operand to an RTX_AUTOINC is the automodified register. Tested on x86_64-linux-gnu. OK to install? Thanks, Richard gcc/ * doc/rtl.texi (RTX_AUTOINC): Document that the first operand is the automodified register. Index: gcc/doc/rtl.texi === --- gcc/doc/rtl.texi2014-05-06 18:38:47.982200623 +0100 +++ gcc/doc/rtl.texi2014-08-30 16:04:50.870456416 +0100 @@ -193,7 +193,8 @@ An RTX code for something that matches i @item RTX_AUTOINC An RTX code for an auto-increment addressing mode, such as -@code{POST_INC}. +@code{POST_INC}. @samp{XEXP (@var{x}, 0)} gives the auto-modified +register. @item RTX_EXTRA All other RTX codes. This category includes the remaining codes used
Re: [PATCH] C++ thunk section names
Ping. On Wed, Aug 6, 2014 at 2:42 PM, Sriraman Tallam tmsri...@google.com wrote: Hi, Just wondering if you got a chance to look at this? Sri On Tue, Jul 8, 2014 at 10:45 AM, Sriraman Tallam tmsri...@google.com wrote: On Tue, Jul 8, 2014 at 10:38 AM, Sriraman Tallam tmsri...@google.com wrote: On Mon, Jul 7, 2014 at 11:48 AM, Jan Hubicka hubi...@ucw.cz wrote: Hello, I apologize for taking so long to get into this patch. I ad busy time (wedding and teaching), should be back in regular schedule now. Sri, can you provide examples to show why putting thunks into the same section as the target function when function reorder is on can be bad ? C++ ABI specify that they are in the same section, but I can't think of the case where this would break. Hmm, I suppose it is the TARGET_USE_LOCAL_THUNK_ALIAS_P code that breaks - you end up with code in two sections where one is accessing local comdat of the toher. I would also like to see testcase that breaks and is fixed by your patch. I would expect that here, by not copying the section name, you actually make things wose. Here is an example where the thunk and the original function get placed in different sections. class base_class_1 { public: virtual void vfn () {} }; class base_class_2 { public: virtual void vfn () {} }; void foo(); class need_thunk_class : public base_class_1, public base_class_2 { public: virtual void vfn () { for (int i = 0; i 10; ++i) foo(); } }; int main (int argc, char *argv[]) { base_class_1 *bc1 = new need_thunk_class (); bc1-vfn(); return 0; } int glob = 0; __attribute__((noinline)) void foo() { glob++; } I am making the function that needs thunk hot. Now, $ g++ thunkex.cc -O2 -fno-reorder-blocks-and-partition -fprofile-generate -ffunction-sections $ a.out $ g++ thunkex.cc -O2 -fno-reorder-blocks-and-partition -fprofile-use -ffunction-sections -c $ objdump -d thunkex.o Disassembly of section .text.hot._ZN16need_thunk_class3vfnEv: _ZN16need_thunk_class3vfnEv: 0: 53 push %rbx 1: bb a0 86 01 00 mov$0x186a0,%ebx ... Disassembly of section .text._ZN16need_thunk_class3vfnEv: _ZThn8_N16need_thunk_class3vfnEv: 0: 48 83 ef 08 sub$0x8,%rdi When the original function gets moved to .text.hot, the thunk does not. It is not always the case that the thunk should either. I forgot to add that this becomes confusing because, in this case, the thunk is the only function sitting in a section whose name does not correspond to its assembler name. If we are not going to have thunk section names the same as the original function when profiles are available and -freorder-functions is used, we as well change the name of the thunk's section to correspond to its assembler name. That was the intention of the patch. Thanks Sri Thanks Sri I think we need to deal with this later; use_tunk is done long before profiling is read and before we decide whether code is hot/cold. I suppose the function reordering code may need to always walk whole comdat group and ensure that sections are same? I.e. pick the highest profile of a function in the group, resolve unique section on it and then copy section names? I had verifier checking that section names within one comdat groups are same, perhaps it was part of the reverted patch for AIX. I will try to get that one back in now. Jan Thanks, David On Thu, Jun 26, 2014 at 10:29 AM, Sriraman Tallam tmsri...@google.com wrote: Hi Honza, Could you review this patch when you find time? Thanks Sri On Tue, Jun 17, 2014 at 10:42 AM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Mon, Jun 9, 2014 at 3:54 PM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Mon, May 19, 2014 at 11:25 AM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Thu, Apr 17, 2014 at 10:41 AM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Wed, Feb 5, 2014 at 4:31 PM, Sriraman Tallam tmsri...@google.com wrote: Hi, I would like this patch reviewed and considered for commit when Stage 1 is active again. Patch Description: A C++ thunk's section name is set to be the same as the original function's section name for which the thunk was created in order to place the two together. This is done in cp/method.c in function use_thunk. However, with function reordering turned on, the original function's section name can change to something like .text.hot.orginal or .text.unlikely.original in function default_function_section in varasm.c based on the node count of that function. The thunk function's section name is not updated to be the same as the original here and also is not always correct to do it as the original function can be hotter than the thunk. I have created
Re: [PATCH x86_64] Optimize access to globals in -fpie -pie builds with copy relocations
Ping. On Fri, Jul 11, 2014 at 10:42 AM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Thu, Jun 26, 2014 at 10:54 AM, Sriraman Tallam tmsri...@google.com wrote: Hi Uros, Could you please review this patch? Thanks Sri On Fri, Jun 20, 2014 at 5:17 PM, Sriraman Tallam tmsri...@google.com wrote: Patch Updated. Sri On Mon, Jun 9, 2014 at 3:55 PM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Mon, May 19, 2014 at 11:11 AM, Sriraman Tallam tmsri...@google.com wrote: Ping. On Thu, May 15, 2014 at 11:34 AM, Sriraman Tallam tmsri...@google.com wrote: Optimize access to globals with -fpie, x86_64 only: Currently, with -fPIE/-fpie, GCC accesses globals that are extern to the module using the GOT. This is two instructions, one to get the address of the global from the GOT and the other to get the value. If it turns out that the global gets defined in the executable at link-time, it still needs to go through the GOT as it is too late then to generate a direct access. Examples: foo.cc -- int a_glob; int main () { return a_glob; // defined in this file } With -O2 -fpie -pie, the generated code directly accesses the global via PC-relative insn: 5e0 main: mov0x165a(%rip),%eax# 1c40 a_glob foo.cc -- extern int a_glob; int main () { return a_glob; // defined in this file } With -O2 -fpie -pie, the generated code accesses global via GOT using two memory loads: 6f0 main: mov0x1609(%rip),%rax # 1d00 _DYNAMIC+0x230 mov(%rax),%eax This is true even if in the latter case the global was defined in the executable through a different file. Some experiments on google benchmarks shows that the extra memory loads affects performance by 1% to 5%. Solution - Copy Relocations: When the linker supports copy relocations, GCC can always assume that the global will be defined in the executable. For globals that are truly extern (come from shared objects), the linker will create copy relocations and have them defined in the executable. Result is that no global access needs to go through the GOT and hence improves performance. This patch to the gold linker : https://sourceware.org/ml/binutils/2014-05/msg00092.html submitted recently allows gold to generate copy relocations for -pie mode when necessary. I have added option -mld-pie-copyrelocs which when combined with -fpie would do this. Note that the BFD linker does not support pie copyrelocs yet and this option cannot be used there. Please review. ChangeLog: * config/i386/i36.opt (mld-pie-copyrelocs): New option. * config/i386/i386.c (legitimate_pic_address_disp_p): Check if this address is still legitimate in the presence of copy relocations and -fpie. * testsuite/gcc.target/i386/ld-pie-copyrelocs-1.c: New test. * testsuite/gcc.target/i386/ld-pie-copyrelocs-2.c: New test. Patch attached. Thanks Sri
RFA: Merge definitions of get_some_local_dynamic_name
Several targets define a function like i386's get_some_local_dynamic_name. The function looks through the current output function and returns the first (arbitrary) local-dynamic symbol that it finds. The result can be used in a call to __tls_get_addr, since all local-dynamic symbols have the same base. This patch replaces the various target functions with a single generic one. The only difference between the implementations was that s390 checked for constant pool references while the others didn't need to (because they don't allow TLS symbols to be forced into the pool). Checking for constant pool references is unnecessary but harmless for the other ports. Also, the walk is needed only once per TLS-referencing output function, so it's hardly critical in terms of compile time. All uses of this function are in final. In general it wouldn't be safe to call the function earlier than that, since the symbol reference could in principle be deleted by any rtl pass. I've therefore cached it in a variable local to final rather than in cfun (which is where the ports used to cache it). Also, i386 was robust against uses of % in inline asm. The patch makes sure the other ports are too. Using % in inline asm would often be a mistake, but it should at least trigger a proper error rather than an ICE. Tested on x86_64-linux-gnu. Also tested by building cross compilers before and after the change on: alpha-linux-gnu powerpc64-linux-gnu s390x-linux-gnu sparc64-linux-gnu OK to install? Thanks, Richard gcc/ * output.h (get_some_local_dynamic_name): Declare. * final.c (some_local_dynamic_name): New variable. (get_some_local_dynamic_name): New function. (final_end_function): Clear some_local_dynamic_name. * config/alpha/alpha.c (machine_function): Remove some_ld_name. (get_some_local_dynamic_name, get_some_local_dynamic_name_1): Delete. (print_operand): Report an error if '%' is used inappropriately. * config/i386/i386.c (get_some_local_dynamic_name): Delete. (get_some_local_dynamic_name_1): Delete. * config/rs6000/rs6000.c (machine_function): Remove some_ld_name. (rs6000_get_some_local_dynamic_name): Delete. (rs6000_get_some_local_dynamic_name_1): Delete. (print_operand): Report an error if '%' is used inappropriately. * config/s390/s390.c (machine_function): Remove some_ld_name. (get_some_local_dynamic_name, get_some_local_dynamic_name_1): Delete. (print_operand): Assert that get_some_local_dynamic_name is nonnull. * config/sparc/sparc.c: Include rtl-iter.h. (machine_function): Remove some_ld_name. (sparc_print_operand): Report an error if '%' is used inappropriately. (get_some_local_dynamic_name, get_some_local_dynamic_name_1): Delete. Index: gcc/output.h === --- gcc/output.h2014-08-31 21:05:04.701330252 +0100 +++ gcc/output.h2014-09-02 19:02:59.820482510 +0100 @@ -52,6 +52,8 @@ extern int get_attr_min_length (rtx); any branches of variable length if possible. */ extern void shorten_branches (rtx_insn *); +const char *get_some_local_dynamic_name (); + /* Output assembler code for the start of a function, and initialize some of the variables in this file for the new function. The label for the function and associated Index: gcc/final.c === --- gcc/final.c 2014-08-31 21:05:04.701330252 +0100 +++ gcc/final.c 2014-09-02 19:17:08.573876805 +0100 @@ -1719,6 +1719,38 @@ reemit_insn_block_notes (void) reorder_blocks (); } +static const char *some_local_dynamic_name; + +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in local-dynamic base patterns. + Return null if there are no local-dynamic references. */ + +const char * +get_some_local_dynamic_name () +{ + subrtx_iterator::array_type array; + rtx_insn *insn; + + if (some_local_dynamic_name) +return some_local_dynamic_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) +if (NONDEBUG_INSN_P (insn)) + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) + { + const_rtx x = *iter; + if (GET_CODE (x) == SYMBOL_REF) + { + if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) + return some_local_dynamic_name = XSTR (x, 0); + if (CONSTANT_POOL_ADDRESS_P (x)) + iter.substitute (get_pool_constant (x)); + } + } + + return 0; +} + /* Output assembler code for the start of a function, and initialize some of the variables in this file for the new function. The label for the function and associated @@ -1904,6 +1936,8 @@ final_end_function (void) if (!dwarf2_debug_info_emitted_p (current_function_decl) dwarf2out_do_frame ())
Re: [4.9] PR 62146
It turns out that the REG_EQUAL note is removed on a hoisted instruction (relevant code is in dead_or_predicable in ifcvt.c) if the source of the move instruction is not a function invariant. In this case, the source is a function invariant (constant) and so that doesn't kick in. I don't understand why this exemption for function invariant is there and the original thread in https://gcc.gnu.org/ml/gcc/2005-05/msg01710.html doesn't explain either. Should I just remove the REG_EQUAL notes of all hoisted instructions or are there cases where it is safe to leave the note? Thanks, Easwaran On Fri, Aug 29, 2014 at 1:06 PM, Jeff Law l...@redhat.com wrote: On 08/25/14 16:42, Easwaran Raman wrote: This patch deletes REG_EQUAL note when a src register is replaced by a constant in an assignment. This is to prevent spurious equivalences between the constant and the expression in the REG_EQUAL note. In the bug reported in PR 62146, an assignment in one branch (which is actually dead) of an IF statement has a REG_EQUAL note equating a register with an expression. Conditional copy propagation replaces the register with 0. The instruction is hoisted above the branch subsequently and then the value 0 is equated with the expression in the REG_EQUAL. Is this ok for 4.9 branch if all tests pass? This patch looks applicable to trunk as well, but I don't have a test case to reproduce the issue in trunk. Something doesn't feel right with this patch. It seems to me the real problem is when when hoist the insn with the note. If the equivalence implied by the note is no longer valid at the insn's new location, then the note needs to be removed. Now determining if the note is no longer valid at the new location may prove difficult ;-) You'd probably have to know why the note was created, how it was changed, etc. So I suspect the right thing to do is just remove REG_EQUAL notes on any insns we hoist in this manner. Jeff
Re: [FORTRAN PATCH] Two -Wlogical-not-parentheses fixes (PR fortran/62270)
Am 02.09.2014 17:32, schrieb Tobias Burnus: Marek Polacek wrote: This patch fixes the last two spots where -Wlogical-not-parentheses warns. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62270#c3 if you want more info about the changes. Bootstrapped/regtested on x86_64-linux, ok for trunk? Looks good to me. Thanks for the patch! As this commit fixes obvious errors for not-so-obvious cases, what about a backport? Thomas
Re: [PATCH x86_64] Optimize access to globals in -fpie -pie builds with copy relocations
On 06/20/2014 05:17 PM, Sriraman Tallam wrote: Index: config/i386/i386.c === --- config/i386/i386.c(revision 211826) +++ config/i386/i386.c(working copy) @@ -12691,7 +12691,9 @@ legitimate_pic_address_disp_p (rtx disp) return true; } else if (!SYMBOL_REF_FAR_ADDR_P (op0) - SYMBOL_REF_LOCAL_P (op0) + (SYMBOL_REF_LOCAL_P (op0) +|| (TARGET_64BIT ix86_copyrelocs flag_pie + !SYMBOL_REF_FUNCTION_P (op0))) ix86_cmodel != CM_LARGE_PIC) return true; break; This is the wrong place to patch. You ought to be adjusting SYMBOL_REF_LOCAL_P, by providing a modified TARGET_BINDS_LOCAL_P. Note in particular that I believe that you are doing the wrong thing with weak and COMMON symbols, in that you probably ought not force a copy reloc there. Note the complexity of default_binds_local_p_1, and the fact that all you really want to modify is /* If PIC, then assume that any global name can be overridden by symbols resolved from other modules. */ else if (shlib) local_p = false; near the bottom of that function. r~
Re: Ping^2 - RE: [PATCH] Add target hook to override DWARF2 frame register size
On 09/02/2014 01:59 AM, Matthew Fortune wrote: gcc/ * target.def (TARGET_DWARF_FRAME_REG_MODE): New target hook. * targhooks.c (default_dwarf_frame_reg_mode): New function. * targhooks.h (default_dwarf_frame_reg_mode): New prototype. * doc/tm.texi.in (TARGET_DWARF_FRAME_REG_MODE): Document. * doc/tm.texi: Regenerate. * dwarf2cfi.c (expand_builtin_init_dwarf_reg_sizes): Abstract mode selection logic to default_dwarf_frame_reg_mode. Ok. r~
[PATCH] PR61889
This patch makes the build of gcov-tool configurable. It checks if ftw.h is available. For mingw build, it provides ftw functionality by using FindFirstFile/FindNextFile/FindClose API. Tested with and without --disable-gcov-tool. Thanks, -Rong 2014-09-02 Rong Xu x...@google.com * gcc/Makefile.in: Make the build gcov-tool configurable. * gcc/configure.ac: Ditto. * gcc/configure: Ditto. * gcc/config.in: Ditto. * gcc/gcov-tool.c (unlink_gcda_file): Support win32 build. (unlink_profile_dir): Ditto. * libgcc/libgcov-util.c (read_gcda_file): Ditto. (read_file_handler): Ditto. (ftw_read_file): Ditto. (myftw): Ditto. (gcov_read_profile_dir): Ditto. (gcov_profile_normalize): Ditto. Index: gcc/Makefile.in === --- gcc/Makefile.in (revision 214831) +++ gcc/Makefile.in (working copy) @@ -123,9 +123,13 @@ SUBDIRS =@subdirs@ build # Selection of languages to be made. CONFIG_LANGUAGES = @all_selected_languages@ -LANGUAGES = c gcov$(exeext) gcov-dump$(exeext) gcov-tool$(exeext) \ -$(CONFIG_LANGUAGES) +LANGUAGES = c gcov$(exeext) gcov-dump$(exeext) $(CONFIG_LANGUAGES) +disable_gcov_tool = @disable_gcov_tool@ +ifneq ($(disable_gcov_tool),yes) +LANGUAGES += gcov-tool$(exeext) +endif + # Default values for variables overridden in Makefile fragments. # CFLAGS is for the user to override to, e.g., do a cross build with -O2. # TCFLAGS is used for compilations with the GCC just built. Index: gcc/configure.ac === --- gcc/configure.ac(revision 214831) +++ gcc/configure.ac(working copy) @@ -5650,6 +5650,26 @@ if test ${ENABLE_LIBQUADMATH_SUPPORT} != no ; fi +# Check if gcov-tool can be built. +AC_ARG_ENABLE(gcov-tool, +[AS_HELP_STRING([--disable-gcov-tool], +[disable the build of gcov-tool])]) +if test x$enable_gcov_tool = xno; then + disable_gcov_tool=yes +else + AC_CHECK_HEADERS(ftw.h, [disable_gcov_tool=no], + [case $host_os in + win32 | cygwin* | mingw32*) +disable_gcov_tool=no +;; + *) +disable_gcov_tool=yes +;; +esac]) +fi +AC_SUBST(disable_gcov_tool) + + # Specify what hash style to use by default. AC_ARG_WITH([linker-hash-style], [AC_HELP_STRING([--with-linker-hash-style={sysv,gnu,both}], Index: gcc/configure === --- gcc/configure (revision 214831) +++ gcc/configure (working copy) @@ -600,6 +600,7 @@ ac_includes_default=\ ac_subst_vars='LTLIBOBJS LIBOBJS +disable_gcov_tool PICFLAG enable_host_shared enable_plugin @@ -932,6 +933,7 @@ enable_version_specific_runtime_libs enable_plugin enable_host_shared enable_libquadmath_support +enable_gcov_tool with_linker_hash_style ' ac_precious_vars='build_alias @@ -1655,6 +1657,7 @@ Optional Features: --enable-host-sharedbuild host code as shared libraries --disable-libquadmath-support disable libquadmath support for Fortran + --disable-gcov-tool disable the build of gcov-tool Optional Packages: --with-PACKAGE[=ARG]use PACKAGE [ARG=yes] @@ -8353,7 +8356,7 @@ fi for ac_header in limits.h stddef.h string.h strings.h stdlib.h time.h iconv.h \ fcntl.h unistd.h sys/file.h sys/time.h sys/mman.h \ sys/resource.h sys/param.h sys/times.h sys/stat.h \ -direct.h malloc.h langinfo.h ldfcn.h locale.h wchar.h +direct.h malloc.h langinfo.h ldfcn.h locale.h wchar.h ftw.h do : as_ac_Header=`$as_echo ac_cv_header_$ac_header | $as_tr_sh` ac_fn_c_check_header_preproc $LINENO $ac_header $as_ac_Header @@ -18033,7 +18036,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat conftest.$ac_ext _LT_EOF -#line 18036 configure +#line 18039 configure #include confdefs.h #if HAVE_DLFCN_H @@ -18139,7 +18142,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat conftest.$ac_ext _LT_EOF -#line 18142 configure +#line 18145 configure #include confdefs.h #if HAVE_DLFCN_H @@ -28116,6 +28119,33 @@ $as_echo #define ENABLE_LIBQUADMATH_SUPPORT 1 fi +# Check if gcov-tool can be built. +# Check whether --enable-gcov-tool was given. +if test ${enable_gcov_tool+set} = set; then : + enableval=$enable_gcov_tool; +fi + +if test x$enable_gcov_tool = xno; then + disable_gcov_tool=yes +else + ac_fn_c_check_header_preproc $LINENO ftw.h ac_cv_header_ftw_h +if test x$ac_cv_header_ftw_h = xyes; then : + disable_gcov_tool=no +else + case $host_os in + win32 | cygwin* | mingw32*) +disable_gcov_tool=no +
Re: [PATCH] PowerPC: Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV
Ping. On 19-08-2014 13:54, Adhemerval Zanella wrote: Ping. On 06-08-2014 17:21, Adhemerval Zanella wrote: On 01-08-2014 12:31, Joseph S. Myers wrote: On Thu, 31 Jul 2014, David Edelsohn wrote: Thanks for implementing the FENV support. The patch generally looks good to me. My one concern is a detail in the implementation of update. I do not have enough experience with GENERIC to verify the details and it seems like it is missing building an outer COMPOUND_EXPR containing update_mffs and the CALL_EXPR for update mtfsf. I suppose what's actually odd there is that you have + tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs); + + tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, update_mffs); so you build a MODIFY_EXPR in void_type_node but then convert it with a VIEW_CONVERT_EXPR. If you'd built the MODIFY_EXPR in double_type_node then the VIEW_CONVERT_EXPR would be meaningful (the value of an assignment a = b being the new value of a), but reinterpreting a void value doesn't make sense. Or you could probably just use call_mffs directly in the VIEW_CONVERT_EXPR without explicitly creating the old_fenv variable. Thanks for the review Josephm. I have changed to avoid the void reinterpretation and use call_mffs directly. I have also removed the the mask generation in 'clear' from your previous message, it is now reusing the mas used in feholdexcept. The testcase patch is the same as before. Checked on both linux-powerpc64/powerpc64le and no regressions found. -- 2014-08-06 Adhemerval Zanella azane...@linux.vnet.ibm.com gcc: * config/rs6000/rs6000.c (rs6000_atomic_assign_expand_fenv): New function. gcc/testsuite: * gcc.dg/atomic/c11-atomic-exec-5.c (test_main_long_double_add_overflow): Define and run only for LDBL_MANT_DIG != 106. (test_main_complex_long_double_add_overflow): Likewise. (test_main_long_double_sub_overflow): Likewise. (test_main_complex_long_double_sub_overflow): Likewise. --- diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d088ff6..7d66eb1 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1631,6 +1631,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost + +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv /* Processor table. */ @@ -7,6 +33340,80 @@ emit_fusion_gpr_load (rtx *operands) return ; } +/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ + +static void +rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + if (!TARGET_HARD_FLOAT || !TARGET_FPRS) +return; + + tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; + tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; + tree call_mffs = build_call_expr (mffs, 0); + + /* Generates the equivalent of feholdexcept (fenv_var) + + *fenv_var = __builtin_mffs (); + double fenv_hold; + *(uint64_t*)fenv_hold = *(uint64_t*)fenv_var 0x0007LL; + __builtin_mtfsf (0xff, fenv_hold); */ + + /* Mask to clear everything except for the rounding modes and non-IEEE + arithmetic flag. */ + const unsigned HOST_WIDE_INT hold_exception_mask = +HOST_WIDE_INT_UC (0x0007); + + tree fenv_var = create_tmp_var (double_type_node, NULL); + + tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs); + + tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); + tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, +build_int_cst (uint64_type_node, hold_exception_mask)); + + tree fenv_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, fenv_llu_and); + + tree hold_mtfsf = build_call_expr (mtfsf, 2, +build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf); + + *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); + + /* Reload the value of fenv_hold to clear the exceptions. */ + + *clear = build_call_expr (mtfsf, 2, +build_int_cst (unsigned_type_node, 0xff), fenv_mtfsf); + + /* Generates the equivalent of feupdateenv (fenv_var) + + double old_fenv = __builtin_mffs (); + double fenv_update; + *(uint64_t*)fenv_update = (*(uint64_t*)old 0x1f00LL) | +(*(uint64_t*)fenv_var 0x1ff80fff); + __builtin_mtfsf (0xff, fenv_update); */ + + const unsigned HOST_WIDE_INT update_exception_mask = +HOST_WIDE_INT_UC (0x1f00); + const unsigned HOST_WIDE_INT new_exception_mask = +HOST_WIDE_INT_UC (0x1ff80fff); + + tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, call_mffs); + tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, +old_llu,
Re: [C PATCH] Don't reject valid code with _Alignas (PR c/61053)
On Thu, May 8, 2014 at 11:19 AM, Marek Polacek pola...@redhat.com wrote: On Wed, May 07, 2014 at 11:31:38AM -0700, H.J. Lu wrote: OK, though I'm not sure if the lp64 conditions are right in the testcase It should be !ia32 instead of lp64. Ok, I changed lp64 to ! { ia32 } and committed the patch now. Marek The change is insufficient for x32, which has the same alignments for floating point types and the integer types with the same size as x86-64. This patch is needed for x32. OK for trunk and 4.8 branch? -- H.J. --- 2014-09-02 H.J. Lu hongjiu...@intel.com * gcc.dg/pr61053.c: Updated for x32. diff --git a/gcc/testsuite/gcc.dg/pr61053.c b/gcc/testsuite/gcc.dg/pr61053.c index 4fd5319..5557784 100644 --- a/gcc/testsuite/gcc.dg/pr61053.c +++ b/gcc/testsuite/gcc.dg/pr61053.c @@ -31,17 +31,17 @@ _Alignas (long double) int ild; _Alignas (char) long int lic; /* { dg-error cannot reduce alignment } */ _Alignas (short int) long int lis; /* { dg-error cannot reduce alignment } */ -_Alignas (int) long int lii; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ +_Alignas (int) long int lii; /* { dg-error cannot reduce alignment { target { ! { ia32 || x32 } } } } */ _Alignas (long int) long int lil; _Alignas (long long int) long int lill; -_Alignas (float) long int lif; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ +_Alignas (float) long int lif; /* { dg-error cannot reduce alignment { target { ! { ia32 || x32 } } } } */ _Alignas (double) long int lid; _Alignas (long double) long int lild; _Alignas (char) long long int llic; /* { dg-error cannot reduce alignment } */ _Alignas (short int) long long int llis; /* { dg-error cannot reduce alignment } */ _Alignas (int) long long int llii; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ -_Alignas (long int) long long int llil; +_Alignas (long int) long long int llil; /* { dg-error cannot reduce alignment { target { x32 } } } */ _Alignas (long long int) long long int llill; _Alignas (float) long long int llif; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ _Alignas (double) long long int llid; @@ -59,7 +59,7 @@ _Alignas (long double) float fld; _Alignas (char) double dc; /* { dg-error cannot reduce alignment } */ _Alignas (short int) double ds; /* { dg-error cannot reduce alignment } */ _Alignas (int) double di; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ -_Alignas (long int) double dl; +_Alignas (long int) double dl; /* { dg-error cannot reduce alignment { target { x32 } } } */ _Alignas (long long int) double dll; _Alignas (float) double df; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ _Alignas (double) double dd;
Re: [C PATCH] Don't reject valid code with _Alignas (PR c/61053)
On Tue, Sep 2, 2014 at 3:29 PM, H.J. Lu hjl.to...@gmail.com wrote: On Thu, May 8, 2014 at 11:19 AM, Marek Polacek pola...@redhat.com wrote: On Wed, May 07, 2014 at 11:31:38AM -0700, H.J. Lu wrote: OK, though I'm not sure if the lp64 conditions are right in the testcase It should be !ia32 instead of lp64. Ok, I changed lp64 to ! { ia32 } and committed the patch now. Marek The change is insufficient for x32, which has the same alignments for floating point types and the integer types with the same size as x86-64. This patch is needed for x32. OK for trunk and 4.8 branch? -- H.J. --- 2014-09-02 H.J. Lu hongjiu...@intel.com * gcc.dg/pr61053.c: Updated for x32. Here is the patch as an attachment. -- H.J. 2014-09-02 H.J. Lu hongjiu...@intel.com * gcc.dg/pr61053.c: Updated for x32. diff --git a/gcc/testsuite/gcc.dg/pr61053.c b/gcc/testsuite/gcc.dg/pr61053.c index 4fd5319..5557784 100644 --- a/gcc/testsuite/gcc.dg/pr61053.c +++ b/gcc/testsuite/gcc.dg/pr61053.c @@ -31,17 +31,17 @@ _Alignas (long double) int ild; _Alignas (char) long int lic; /* { dg-error cannot reduce alignment } */ _Alignas (short int) long int lis; /* { dg-error cannot reduce alignment } */ -_Alignas (int) long int lii; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ +_Alignas (int) long int lii; /* { dg-error cannot reduce alignment { target { ! { ia32 || x32 } } } } */ _Alignas (long int) long int lil; _Alignas (long long int) long int lill; -_Alignas (float) long int lif; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ +_Alignas (float) long int lif; /* { dg-error cannot reduce alignment { target { ! { ia32 || x32 } } } } */ _Alignas (double) long int lid; _Alignas (long double) long int lild; _Alignas (char) long long int llic; /* { dg-error cannot reduce alignment } */ _Alignas (short int) long long int llis; /* { dg-error cannot reduce alignment } */ _Alignas (int) long long int llii; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ -_Alignas (long int) long long int llil; +_Alignas (long int) long long int llil; /* { dg-error cannot reduce alignment { target { x32 } } } */ _Alignas (long long int) long long int llill; _Alignas (float) long long int llif; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ _Alignas (double) long long int llid; @@ -59,7 +59,7 @@ _Alignas (long double) float fld; _Alignas (char) double dc; /* { dg-error cannot reduce alignment } */ _Alignas (short int) double ds; /* { dg-error cannot reduce alignment } */ _Alignas (int) double di; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ -_Alignas (long int) double dl; +_Alignas (long int) double dl; /* { dg-error cannot reduce alignment { target { x32 } } } */ _Alignas (long long int) double dll; _Alignas (float) double df; /* { dg-error cannot reduce alignment { target { ! { ia32 } } } } */ _Alignas (double) double dd;
[PATCH 1/2] use rtx_code_label more
From: Trevor Saunders tsaund...@mozilla.com Hi, $subject bootstrapped + regtested on x86_64-unknown-linux-gnu, and run through config-list.mk. Will commit it shortly as preapproved by Jeff in http://gcc.gnu.org/ml/gcc-patches/2014-08/msg01310.html Trev gcc/ * asan.c, cfgexpand.c, config/alpha/alpha.md, config/arm/arm.c, config/epiphany/epiphany.md, config/h8300/h8300.c, config/i386/i386.md, config/m32r/m32r.c, config/mcore/mcore.md, config/mips/mips.c, config/mips/mips.md, config/nios2/nios2.c, config/pa/pa.c, config/s390/s390.c, config/s390/s390.md, config/sh/sh-mem.cc, config/sh/sh.c, config/sparc/sparc.c, dojump.c, function.c, optabs.c, stmt.c: Assign the result of gen_label_rtx to rtx_code_label * instead of rtx. --- gcc/ChangeLog | 11 +++ gcc/asan.c | 7 --- gcc/cfgexpand.c | 4 ++-- gcc/config/alpha/alpha.md | 4 ++-- gcc/config/arm/arm.c| 6 +++--- gcc/config/epiphany/epiphany.md | 2 +- gcc/config/h8300/h8300.c| 4 ++-- gcc/config/i386/i386.md | 16 gcc/config/m32r/m32r.c | 2 +- gcc/config/mcore/mcore.md | 4 ++-- gcc/config/mips/mips.c | 11 ++- gcc/config/mips/mips.md | 16 gcc/config/nios2/nios2.c| 2 +- gcc/config/pa/pa.c | 2 +- gcc/config/s390/s390.c | 24 gcc/config/s390/s390.md | 28 ++-- gcc/config/sh/sh-mem.cc | 34 +- gcc/config/sh/sh.c | 9 + gcc/config/sparc/sparc.c| 20 ++-- gcc/dojump.c| 8 gcc/function.c | 2 +- gcc/optabs.c| 5 ++--- gcc/stmt.c | 6 +++--- 23 files changed, 120 insertions(+), 107 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 22e69c0..3144019 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,16 @@ 2014-09-02 Trevor Saunders tsaund...@mozilla.com + * asan.c, cfgexpand.c, config/alpha/alpha.md, config/arm/arm.c, + config/epiphany/epiphany.md, config/h8300/h8300.c, config/i386/i386.md, + config/m32r/m32r.c, config/mcore/mcore.md, config/mips/mips.c, + config/mips/mips.md, config/nios2/nios2.c, config/pa/pa.c, + config/s390/s390.c, config/s390/s390.md, config/sh/sh-mem.cc, + config/sh/sh.c, config/sparc/sparc.c, dojump.c, function.c, optabs.c, + stmt.c: Assign the result of gen_label_rtx to rtx_code_label * instead + of rtx. + +2014-09-02 Trevor Saunders tsaund...@mozilla.com + * alloc-pool.c: Include coretypes.h. * cgraph.h, dbxout.c, dwarf2out.c, except.c, except.h, function.c, function.h, symtab.c, tree-cfg.c, tree-eh.c: Use hash_map and diff --git a/gcc/asan.c b/gcc/asan.c index 4ed9344..cf5de27 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -983,7 +983,8 @@ rtx_insn * asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, HOST_WIDE_INT *offsets, tree *decls, int length) { - rtx shadow_base, shadow_mem, ret, mem, orig_base, lab; + rtx shadow_base, shadow_mem, ret, mem, orig_base; + rtx_code_label *lab; rtx_insn *insns; char buf[30]; unsigned char shadow_bytes[4]; @@ -1174,10 +1175,10 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, /* Construct epilogue sequence. */ start_sequence (); - lab = NULL_RTX; + lab = NULL; if (use_after_return_class != -1) { - rtx lab2 = gen_label_rtx (); + rtx_code_label *lab2 = gen_label_rtx (); char c = (char) ASAN_STACK_MAGIC_USE_AFTER_RET; int very_likely = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); emit_cmp_and_jump_insns (orig_base, base, EQ, NULL_RTX, diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 0a1b4bf..7d11b1b 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -2007,7 +2007,7 @@ label_rtx_for_bb (basic_block bb ATTRIBUTE_UNUSED) return label_rtx (lab); } - rtx l = gen_label_rtx (); + rtx_code_label *l = gen_label_rtx (); lab_rtx_for_bb-put (bb, l); return l; } @@ -2469,7 +2469,7 @@ expand_asm_operands (tree string, tree outputs, tree inputs, enum machine_mode *inout_mode = XALLOCAVEC (enum machine_mode, noutputs); const char **constraints = XALLOCAVEC (const char *, noutputs + ninputs); int old_generating_concat_p = generating_concat_p; - rtx fallthru_label = NULL_RTX; + rtx_code_label *fallthru_label = NULL; /* An ASM with no outputs needs to be treated as volatile, for now. */ if (noutputs == 0) diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 34ff1f0..8cf8a06 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -4907,8 +4907,8 @@ } else { - rtx out_label = 0;
[PATCH 2/2] use rtx_code_label in lab_rtx_for_bb hash map
From: Trevor Saunders tsaund...@mozilla.com Hi, $subject again bootstrapped + regtested on x86_64-unknown-linux-gnu, and run through config-list.mk. Will commit it shortly as preapproved by Jeff in http://gcc.gnu.org/ml/gcc-patches/2014-08/msg01310.html Trev gcc/ChangeLog: * cfgexpand.c (label_rtx_for_bb): Change type to hash_mapbasic_block, rtx_code_label * *. (expand_gimple_basic_block): Adjust. (pass_expand::execute): Likewise. --- gcc/ChangeLog | 7 +++ gcc/cfgexpand.c | 8 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3144019..ca786c2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,12 @@ 2014-09-02 Trevor Saunders tsaund...@mozilla.com + * cfgexpand.c (label_rtx_for_bb): Change type to + hash_mapbasic_block, rtx_code_label * *. + (expand_gimple_basic_block): Adjust. + (pass_expand::execute): Likewise. + +2014-09-02 Trevor Saunders tsaund...@mozilla.com + * asan.c, cfgexpand.c, config/alpha/alpha.md, config/arm/arm.c, config/epiphany/epiphany.md, config/h8300/h8300.c, config/i386/i386.md, config/m32r/m32r.c, config/mcore/mcore.md, config/mips/mips.c, diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 7d11b1b..db76897 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -1974,7 +1974,7 @@ maybe_dump_rtl_for_gimple_stmt (gimple stmt, rtx_insn *since) /* Maps the blocks that do not contain tree labels to rtx labels. */ -static hash_mapbasic_block, rtx *lab_rtx_for_bb; +static hash_mapbasic_block, rtx_code_label * *lab_rtx_for_bb; /* Returns the label_rtx expression for a label starting basic block BB. */ @@ -1988,7 +1988,7 @@ label_rtx_for_bb (basic_block bb ATTRIBUTE_UNUSED) if (bb-flags BB_RTL) return block_label (bb); - rtx *elt = lab_rtx_for_bb-get (bb); + rtx_code_label **elt = lab_rtx_for_bb-get (bb); if (elt) return *elt; @@ -4945,7 +4945,7 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls) stmt = NULL; } - rtx *elt = lab_rtx_for_bb-get (bb); + rtx_code_label **elt = lab_rtx_for_bb-get (bb); if (stmt || elt) { @@ -5815,7 +5815,7 @@ pass_expand::execute (function *fun) FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (fun)-succs) e-flags = ~EDGE_EXECUTABLE; - lab_rtx_for_bb = new hash_mapbasic_block, rtx; + lab_rtx_for_bb = new hash_mapbasic_block, rtx_code_label *; FOR_BB_BETWEEN (bb, init_block-next_bb, EXIT_BLOCK_PTR_FOR_FN (fun), next_bb) bb = expand_gimple_basic_block (bb, var_ret_seq != NULL_RTX); -- 2.1.0
[RFC] Tweak gcc.c-torture/execute/pr39228.c
Hi, gcc.c-torture/execute/pr39228.c fails with (test for excess errors) on SH for recent revisions. My gcc.log says: gcc.c-torture/execute/pr39228.c:20:43: warning: always_inline function might not be inlinable [-Wattributes] ... It looks that alpha has the similar issue: https://gcc.gnu.org/ml/gcc-testresults/2014-08/msg02660.html alpha and sh redefine dg-options to -mieee in the test case instead of the default dg-options -w and get the above warning. The patch below tweaks the test to fix it. Perhaps the first two lines are enough to avoid the error but avoiding the root cause of warnings would be better. Tested on i686-linux and sh4-linux. Regards, kaz -- * gcc.c-torture/execute/pr39228.c: Add -w option on sh*-*-* and alhpa*-*-*. Add inline keyword to test functions. --- ORIG/trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-08-26 09:26:20.0 +0900 +++ trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c 2014-09-03 07:42:30.085524983 +0900 @@ -1,23 +1,23 @@ -/* { dg-options -mieee { target sh*-*-* alpha*-*-* } } */ +/* { dg-options -w -mieee { target sh*-*-* alpha*-*-* } } */ /* { dg-skip-if No Inf/NaN support { spu-*-* } * } */ extern void abort (void); -static int __attribute__((always_inline)) testf (float b) +static inline int __attribute__((always_inline)) testf (float b) { float c = 1.01f * b; return __builtin_isinff (c); } -static int __attribute__((always_inline)) test (double b) +static inline int __attribute__((always_inline)) test (double b) { double c = 1.01 * b; return __builtin_isinf (c); } -static int __attribute__((always_inline)) testl (long double b) +static inline int __attribute__((always_inline)) testl (long double b) { long double c = 1.01L * b;
Re: fix gfcov regression
On 08/23/14 10:33, Nathan Sidwell wrote: Hi, this patch fixes a defect Jan found with firefox and its shared objects. We were inadvertently calling an externally visible and overridable symbol, rather than the local shared object's instance. This led to strangely sparse gcov results. I've taken the STRONG_ALIAS #define from glibc. I'm not 100% sure it's valid for all supported targets. Tested in x86_64-linux I've not committed this patch because of that, and (b) I'm about to emmigrate, so likely to be unable to respond to any potential fallout in a timely manner. I've committed the patch now.
Re: [PATCH PR62151]Fix uninitialized register issue caused by distribute_notes in combine pass
On Tue, Sep 2, 2014 at 9:40 PM, Segher Boessenkool seg...@kernel.crashing.org wrote: On Tue, Sep 02, 2014 at 02:10:32PM +0200, Ulrich Weigand wrote: In any case, this test in can_combine_p rejects a combination for *two* different issues. One is the earlyclobber problem, which is what that 2004 thread was about, and which my patch back then relaxed for fixed hard register. However, this doesn't seem to apply to the example above; that is really about the second problem: don't substitute into a clobber. Right. I understand the reason why this particular substitution is rejected is simply that if it weren't, we'd be substituting flags:CC=cmp(r84:SI,0x1) into clobber flags:CC, resulting in clobber cmp(r84:SI,0x1), which is invalid RTL. I checked, and that is indeed what combine does. How silly. Now I guess this check could be relaxed if somewhere else in combine we'd recognize the substitution into a clobber and simply omit it in that case. Yeah. In the testcase, combine tries combining 76,77 (77 is that clobbering insn) and refuses it; then it tries 32,76,77 and refuses it; and then it tries 32,76,77,43 and allows it (it doesn't do this check at all, 77 is not i3, combine omits the clobber completely). Which is inconsistent. I guess it makes sense because this way it doesn't introduce any invalid instructions. But yes, how combine handles the clobber in this way may help combine the three instructions? Thanks, bin What a mess. Thanks for looking! Segher
Re: [RFC] Tweak gcc.c-torture/execute/pr39228.c
Hi, On Sep 3, 2014, at 2:42 AM, Kaz Kojima kkoj...@rr.iij4u.or.jp wrote: Hi, gcc.c-torture/execute/pr39228.c fails with (test for excess errors) on SH for recent revisions. My gcc.log says: gcc.c-torture/execute/pr39228.c:20:43: warning: always_inline function might not be inlinable [-Wattributes] ... It looks that alpha has the similar issue: https://gcc.gnu.org/ml/gcc-testresults/2014-08/msg02660.html alpha and sh redefine dg-options to -mieee in the test case instead of the default dg-options -w and get the above warning. The patch below tweaks the test to fix it. Perhaps the first two lines are enough to avoid the error but avoiding the root cause of warnings would be better. Tested on i686-linux and sh4-linux. -mieee should be the default on sh* and thus can be removed from the dg-options line, or is it not? If -mieee is still needed (for alpha) maybe it's better to use dg-additional-options instead? Cheers, Oleg Regards, kaz -- * gcc.c-torture/execute/pr39228.c: Add -w option on sh*-*-* and alhpa*-*-*. Add inline keyword to test functions. --- ORIG/trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-08-26 09:26:20.0 +0900 +++ trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-09-03 07:42:30.085524983 +0900 @@ -1,23 +1,23 @@ -/* { dg-options -mieee { target sh*-*-* alpha*-*-* } } */ +/* { dg-options -w -mieee { target sh*-*-* alpha*-*-* } } */ /* { dg-skip-if No Inf/NaN support { spu-*-* } * } */ extern void abort (void); -static int __attribute__((always_inline)) testf (float b) +static inline int __attribute__((always_inline)) testf (float b) { float c = 1.01f * b; return __builtin_isinff (c); } -static int __attribute__((always_inline)) test (double b) +static inline int __attribute__((always_inline)) test (double b) { double c = 1.01 * b; return __builtin_isinf (c); } -static int __attribute__((always_inline)) testl (long double b) +static inline int __attribute__((always_inline)) testl (long double b) { long double c = 1.01L * b;
Re: [RFC] Tweak gcc.c-torture/execute/pr39228.c
Oleg Endo oleg.e...@t-online.de wrote: -mieee should be the default on sh* and thus can be removed from the dg-options line, or is it not? If -mieee is still needed (for alpha) maybe it's better to use dg-additional-options instead? Sure. The attached is a revised one. Regards, kaz -- * gcc.c-torture/execute/pr39228.c: Use dg-additional-options instead of dg-options and remove sh*-*-* from its target list. Add inline keyword to test functions. --- ORIG/trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c2014-08-26 09:26:20.0 +0900 +++ trunk/gcc/testsuite/gcc.c-torture/execute/pr39228.c 2014-09-03 14:16:23.313631715 +0900 @@ -1,23 +1,23 @@ -/* { dg-options -mieee { target sh*-*-* alpha*-*-* } } */ +/* { dg-additional-options -mieee { target alpha*-*-* } } */ /* { dg-skip-if No Inf/NaN support { spu-*-* } * } */ extern void abort (void); -static int __attribute__((always_inline)) testf (float b) +static inline int __attribute__((always_inline)) testf (float b) { float c = 1.01f * b; return __builtin_isinff (c); } -static int __attribute__((always_inline)) test (double b) +static inline int __attribute__((always_inline)) test (double b) { double c = 1.01 * b; return __builtin_isinf (c); } -static int __attribute__((always_inline)) testl (long double b) +static inline int __attribute__((always_inline)) testl (long double b) { long double c = 1.01L * b;