[PATCH i386 AVX512] [78/n] Use blend for inserting.
Hello, This patch extends insertion hook. AVX-512* tests on top of patch-set all pass under simulator. gcc/ * config/i386/i386.c (ix86_expand_vector_set): Handle V8DF, V8DI, V16SF, V16SI, V32HI, V64QI modes. -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index fcccdc3..b20eabf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -40902,6 +40902,79 @@ half: emit_insn (gen_insert[j][i] (target, target, tmp)); return; +case V8DFmode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv8df (target, tmp, target, +force_reg (QImode, GEN_INT (1 elt; + return; + } + else + break; +case V8DImode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv8di (target, tmp, target, +force_reg (QImode, GEN_INT (1 elt; + return; + } + else + break; +case V16SFmode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv16sf (target, tmp, target, + force_reg (HImode, GEN_INT (1 elt; + return; + } + else + break; +case V16SImode: + if (TARGET_AVX512F) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512f_blendmv16si (target, tmp, target, + force_reg (HImode, GEN_INT (1 elt; + return; + } + else + break; +case V32HImode: + if (TARGET_AVX512F TARGET_AVX512BW) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target, + force_reg (SImode, GEN_INT (1 elt; + return; + } + else + break; +case V64QImode: + if (TARGET_AVX512F TARGET_AVX512BW) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target, + force_reg (DImode, GEN_INT (1 elt; + return; + } + else + break; + default: break; }
[PATCH i386 AVX512] [79/n] Extend expand_mul_widen_hilo.
Hello, This patch extends expand_mul_widen_hilo to 512-bit QI,SI,HI modes. Bootstrapped and regtested gcc/ * config/i386/i386.c (ix86_expand_mul_widen_hilo): Handle V32HI, V16SI, V64QI modes. Is it ok for trunk? -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ae64c44..945bc8d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -45610,6 +45610,9 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, case V16QImode: case V32QImode: +case V32HImode: +case V16SImode: +case V64QImode: t1 = gen_reg_rtx (wmode); t2 = gen_reg_rtx (wmode); ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
Re: PR debug/60655, debug loc expressions
Ping? https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00704.html -- Alan Modra Australia Development Lab, IBM
Re: [flag-types.h] don't assume 32-bit ints
On Wed, Oct 15, 2014 at 06:16:45PM -0400, DJ Delorie wrote: Since flag-types.h is used in target code, it needs to be -Wall-safe for targets with 16-bit int. OK? With a ChangeLog entry yes. --- gcc/flag-types.h (revision 216287) +++ gcc/flag-types.h (working copy) @@ -229,17 +229,17 @@ enum sanitize_code { SANITIZE_RETURN = 1 10, SANITIZE_SI_OVERFLOW = 1 11, SANITIZE_BOOL = 1 12, SANITIZE_ENUM = 1 13, SANITIZE_FLOAT_DIVIDE = 1 14, SANITIZE_FLOAT_CAST = 1 15, - SANITIZE_BOUNDS = 1 16, - SANITIZE_ALIGNMENT = 1 17, - SANITIZE_NONNULL_ATTRIBUTE = 1 18, - SANITIZE_RETURNS_NONNULL_ATTRIBUTE = 1 19, - SANITIZE_OBJECT_SIZE = 1 20, + SANITIZE_BOUNDS = 1UL 16, + SANITIZE_ALIGNMENT = 1UL 17, + SANITIZE_NONNULL_ATTRIBUTE = 1UL 18, + SANITIZE_RETURNS_NONNULL_ATTRIBUTE = 1UL 19, + SANITIZE_OBJECT_SIZE = 1UL 20, SANITIZE_UNDEFINED = SANITIZE_SHIFT | SANITIZE_DIVIDE | SANITIZE_UNREACHABLE | SANITIZE_VLA | SANITIZE_NULL | SANITIZE_RETURN | SANITIZE_SI_OVERFLOW | SANITIZE_BOOL | SANITIZE_ENUM | SANITIZE_BOUNDS | SANITIZE_ALIGNMENT | SANITIZE_NONNULL_ATTRIBUTE | SANITIZE_RETURNS_NONNULL_ATTRIBUTE Jakub
Re: PR debug/60655, debug loc expressions
On Thu, Oct 16, 2014 at 05:25:57PM +1030, Alan Modra wrote: Ping? https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00704.html I think the simplification should be done when constructing the expressions, i.e. if possible in the simplification callback or so if it isn't performed at some level. Because otherwise, you construct the RTL all the way up into complex expressions, and then another simplification will, if there are simplifications e.g. very deep in the expressions, copy the rest all the way up, creating tons of GC garbage. So, please find the spot where we forget to simplify stuff, and put the simplification there. Jakub
Re: Check that unlinked uses do not contain ssa-names when renaming.
On 08/10/12 11:24, Richard Guenther wrote: On Sun, Oct 7, 2012 at 12:44 PM, Tom de Vries tom_devr...@mentor.com wrote: Richard, attached patch checks that unlinked uses do not contain ssa-names when renaming. This assert triggers when compiling (without the fix) the PR54735 example. AFAIU, it was due to chance that we caught the PR54735 bug by hitting the verification failure, because the new vdef introduced by renaming happened to be the same name as the ssa name referenced in the invalid unlinked use (in terms of maybe_replace_use: rdef == use). The assert from this patch catches all cases that an unlinked use contains an ssa-name. Bootstrapped and reg-tested on x86_64 (Ada inclusive). OK for trunk? I don't think that is exactly what we should assert here ... (I thought about adding checking myself ...). What we'd want to assert is that before any new DEF is registered (which may re-allocate an SSA name) that no uses with SSA_NAME_IN_FREELIST appear. Thus, a light verification pass would be necessary at the beginning of update_ssa (which I queued onto my TODO list ...). We'd want that anyway to for example catch the case where a non-virtual operand is partially renamed. Richard, while developing a patch, I ran into the same 'no immediate_use list' verification error again, caused by an unlinked use containing an ssa-name. The verification error was caused by an error in my patch, but triggered by chance, by an unrelated change in the patch. I've tried to implement the 'light verification pass' you describe above, and I've checked that the error in my patch is found, also when I remove the trigger for the verification error from my patch. Bootstrapped and reg-tested on x86_64 (with the ENABLE_CHECKING guarding removed, in order to ensure the code is active). OK for trunk? Thanks, - Tom 2014-10-16 Tom de Vries t...@codesourcery.com * tree-into-ssa.c (update_ssa): Assert that there's no ssa use operand with SSA_NAME_IN_FREELIST. diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c index 01203de..227d5bb 100644 --- a/gcc/tree-into-ssa.c +++ b/gcc/tree-into-ssa.c update_ssa (unsigned update_flags) timevar_push (TV_TREE_SSA_INCREMENTAL); +#ifdef ENABLE_CHECKING + FOR_EACH_BB_FN (bb, cfun) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi)) + { + gimple stmt = gsi_stmt (gsi); + + ssa_op_iter i; + use_operand_p use_p; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_ALL_USES) + { + tree use = USE_FROM_PTR (use_p); + if (TREE_CODE (use) != SSA_NAME) + continue; + + gcc_assert (!SSA_NAME_IN_FREE_LIST (use)); + } + } +} +#endif + if (dump_file (dump_flags TDF_DETAILS)) fprintf (dump_file, \nUpdating SSA:\n); -- 1.9.1
Re: [PATCH i386 AVX512] [78/n] Use blend for inserting.
On Thu, Oct 16, 2014 at 10:24:45AM +0400, Kirill Yukhin wrote: Hello, This patch extends insertion hook. AVX-512* tests on top of patch-set all pass under simulator. gcc/ * config/i386/i386.c (ix86_expand_vector_set): Handle V8DF, V8DI, V16SF, V16SI, V32HI, V64QI modes. Just a ChangeLog comment style (seen in several entries you've committed and several posted patches). Please don't put a line break right after the filename if the (functionname): part fits nicely on the same line, the description can be wrapped anywhere as appropriate. In this case, * config/i386/i386.c (ix86_expand_vector_set): Handle V8DF, V8DI, V16SF, V16SI, V32HI, V64QI modes. is shorter and more readable. Other than that, this particular patch LGTM (unless we'd want for the 4 mostly repetitious cases add a common handling spot, which would need the gen fnpointer and kmode vars set before goto), but I'll leave it to Uros to ack it. Jakub
Re: NRV with address taken
On Thu, Oct 16, 2014 at 07:37:18AM +0200, Marc Glisse wrote: Hello, the attached one-liner passed bootstrap+testsuite (really all languages) on x86_64-linux-gnu (I got an extra pass of unix/-m32: os but I assume that the failure with trunk was random). The current code is a bit weird: we bail out if either result or found is TREE_ADDRESSABLE, but then the variable replacement includes: TREE_ADDRESSABLE (result) |= TREE_ADDRESSABLE (found); (modified recently, it was a plain assignment before) I mostly ran the testsuite to find a testcase showing why found should not have its address taken, so if someone wants to add one (or at least a comment in tree-nrv.c), that would be good. I'd worry if both result and found are address taken before the pass, then trying to merge them together might mean something meant to have different addresses collapses into the same object. 2014-10-16 Marc Glisse marc.gli...@inria.fr * tree-nrv.c (pass_nrv::execute): Don't disable when address is taken. -- Marc Glisse Index: gcc/tree-nrv.c === --- gcc/tree-nrv.c(revision 216286) +++ gcc/tree-nrv.c(working copy) @@ -210,21 +210,20 @@ pass_nrv::execute (function *fun) return 0; } else found = rhs; /* The returned value must be a local automatic variable of the same type and alignment as the function's result. */ if (TREE_CODE (found) != VAR_DECL || TREE_THIS_VOLATILE (found) || !auto_var_in_fn_p (found, current_function_decl) - || TREE_ADDRESSABLE (found) || DECL_ALIGN (found) DECL_ALIGN (result) || !useless_type_conversion_p (result_type, TREE_TYPE (found))) return 0; } else if (gimple_has_lhs (stmt)) { tree addr = get_base_address (gimple_get_lhs (stmt)); /* If there's any MODIFY of component of RESULT, then bail out. */ Jakub
Re: Towards GNU11
On Wed, Oct 15, 2014 at 12:08 PM, Marek Polacek pola...@redhat.com wrote: On Tue, Oct 14, 2014 at 09:23:29AM +0200, Marek Polacek wrote: The consensus seems to be to go forward with this change. I will commit the patch in 24 hours unless I hear objections. I made the change. Please report any fallout to me. Most of the graphite tests don't compile with -std=c11 for me. FAIL: gcc.dg/graphite/id-1.c (test for excess errors) FAIL: gcc.dg/graphite/id-13.c (test for excess errors) FAIL: gcc.dg/graphite/id-17.c (test for excess errors) FAIL: gcc.dg/graphite/id-2.c (test for excess errors) FAIL: gcc.dg/graphite/id-23.c (test for excess errors) FAIL: gcc.dg/graphite/id-26.c (test for excess errors) FAIL: gcc.dg/graphite/id-4.c (test for excess errors) FAIL: gcc.dg/graphite/id-8.c (test for excess errors) FAIL: gcc.dg/graphite/id-pr43464-1.c (test for excess errors) FAIL: gcc.dg/graphite/id-pr43464.c (test for excess errors) FAIL: gcc.dg/graphite/id-pr45230-1.c (test for excess errors) FAIL: gcc.dg/graphite/id-pr45230.c (test for excess errors) FAIL: gcc.dg/graphite/id-pr45231.c (test for excess errors) FAIL: gcc.dg/graphite/pr37485.c (test for excess errors) FAIL: gcc.dg/graphite/pr38073.c (test for excess errors) FAIL: gcc.dg/graphite/pr38125.c (test for excess errors) FAIL: gcc.dg/graphite/pr38409.c (test for excess errors) FAIL: gcc.dg/graphite/pr38413.c (test for excess errors) FAIL: gcc.dg/graphite/pr38500.c (test for excess errors) FAIL: gcc.dg/graphite/pr38510.c (test for excess errors) FAIL: gcc.dg/graphite/pr38786.c (test for excess errors) FAIL: gcc.dg/graphite/pr39260.c (test for excess errors) FAIL: gcc.dg/graphite/pr42284.c (test for excess errors) FAIL: gcc.dg/graphite/pr42914.c (test for excess errors) FAIL: gcc.dg/graphite/pr46404-1.c (test for excess errors) FAIL: gcc.dg/graphite/pr60979.c (test for excess errors) FAIL: gcc.dg/graphite/scop-19.c (test for excess errors) Richard. Enjoy. Marek
Re: [lto] don't assume pointer size
On Thu, Oct 16, 2014 at 12:07 AM, DJ Delorie d...@redhat.com wrote: In the event that pointer sizes aren't powers of two, choose a more suitable alignment than (unsigned)(-1), which results in HUGE file sizes. Ok? Ok. Thanks, Richard. Index: gcc/lto/lto-object.c === --- gcc/lto/lto-object.c(revision 216287) +++ gcc/lto/lto-object.c(working copy) @@ -335,13 +335,13 @@ lto_obj_begin_section (const char *name) lo = (struct lto_simple_object *) current_out_file; gcc_assert (lo != NULL lo-sobj_r == NULL lo-sobj_w != NULL lo-section == NULL); - align = exact_log2 (POINTER_SIZE / BITS_PER_UNIT); + align = ceil_log2 (POINTER_SIZE_UNITS); lo-section = simple_object_write_create_section (lo-sobj_w, name, align, errmsg, err); if (lo-section == NULL) { if (err == 0) fatal_error (%s, errmsg);
Re: [v3] Minimally exercise the other alias_decls in type_traits
... the below completes the work, no -std=gnu++0x anymore. Paolo. / 2014-10-16 Paolo Carlini paolo.carl...@oracle.com * testsuite/lib/libstdc++.exp: Prefer -std=gnu++11. * testsuite/20_util/add_lvalue_reference/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/add_rvalue_reference/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/addressof/1.cc: Likewise. * testsuite/20_util/addressof/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/aligned_storage/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/aligned_storage/value.cc: Likewise. * testsuite/20_util/allocator_traits/members/ allocate_hint.cc: Likewise. * testsuite/20_util/allocator_traits/members/construct.cc: Likewise. * testsuite/20_util/allocator_traits/members/destroy.cc: Likewise. * testsuite/20_util/allocator_traits/members/max_size.cc: Likewise. * testsuite/20_util/allocator_traits/members/select.cc: Likewise. * testsuite/20_util/allocator_traits/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/allocator_traits/requirements/ typedefs.cc: Likewise. * testsuite/20_util/bad_function_call/ cons_virtual_derivation.cc: Likewise. * testsuite/20_util/bind/35569.cc: Likewise. * testsuite/20_util/bind/38889.cc: Likewise. * testsuite/20_util/bind/42593.cc: Likewise. * testsuite/20_util/bind/45924.cc: Likewise. * testsuite/20_util/bind/48698.cc: Likewise. * testsuite/20_util/bind/49058_1.cc: Likewise. * testsuite/20_util/bind/49058_2.cc: Likewise. * testsuite/20_util/bind/all_bound.cc: Likewise. * testsuite/20_util/bind/conv_result.cc: Likewise. * testsuite/20_util/bind/cv_quals.cc: Likewise. * testsuite/20_util/bind/cv_quals_2.cc: Likewise. * testsuite/20_util/bind/cv_quals_3.cc: Likewise. * testsuite/20_util/bind/move.cc: Likewise. * testsuite/20_util/bind/nested.cc: Likewise. * testsuite/20_util/bind/placeholders.cc: Likewise. * testsuite/20_util/bind/ref.cc: Likewise. * testsuite/20_util/bind/ref2.cc: Likewise. * testsuite/20_util/bind/ref_neg.cc: Likewise. * testsuite/20_util/bind/socket.cc: Likewise. * testsuite/20_util/common_type/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/common_type/requirements/ typedefs-1.cc: Likewise. * testsuite/20_util/conditional/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/conditional/requirements/typedefs.cc: Likewise. * testsuite/20_util/decay/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/declval/requirements/1.cc: Likewise. * testsuite/20_util/declval/requirements/1_neg.cc: Likewise. * testsuite/20_util/default_delete/48631_neg.cc: Likewise. * testsuite/20_util/default_delete/cons/constexpr.cc: Likewise. * testsuite/20_util/duration/arithmetic/1.cc: Likewise. * testsuite/20_util/duration/arithmetic/2.cc: Likewise. * testsuite/20_util/duration/arithmetic/constexpr.cc: Likewise. * testsuite/20_util/duration/arithmetic/dr2020.cc: Likewise. * testsuite/20_util/duration/arithmetic/dr934-1.cc: Likewise. * testsuite/20_util/duration/arithmetic/dr934-2.cc: Likewise. * testsuite/20_util/duration/comparison_operators/1.cc: Likewise. * testsuite/20_util/duration/comparison_operators/ constexpr.cc: Likewise. * testsuite/20_util/duration/cons/1.cc: Likewise. * testsuite/20_util/duration/cons/1_neg.cc: Likewise. * testsuite/20_util/duration/cons/2.cc: Likewise. * testsuite/20_util/duration/cons/constexpr.cc: Likewise. * testsuite/20_util/duration/cons/dr974_neg.cc: Likewise. * testsuite/20_util/duration/requirements/ constexpr_functions.cc: Likewise. * testsuite/20_util/duration/requirements/ explicit_instantiation/explicit_instantiation.cc: Likewise. * testsuite/20_util/duration/requirements/typedefs_neg1.cc: Likewise. * testsuite/20_util/duration/requirements/typedefs_neg2.cc: Likewise. * testsuite/20_util/duration/requirements/typedefs_neg3.cc: Likewise. * testsuite/20_util/duration_cast/constexpr.cc: Likewise. * testsuite/20_util/enable_if/requirements/ explicit_instantiation.cc: Likewise. * testsuite/20_util/enable_if/requirements/typedefs.cc: Likewise. * testsuite/20_util/enable_if/requirements/typedefs_neg.cc: Likewise. * testsuite/20_util/enable_shared_from_this/cons/constexpr.cc: Likewise. * testsuite/20_util/enable_shared_from_this/requirements/
Re: Check that unlinked uses do not contain ssa-names when renaming.
On Thu, Oct 16, 2014 at 9:20 AM, Tom de Vries tom_devr...@mentor.com wrote: On 08/10/12 11:24, Richard Guenther wrote: On Sun, Oct 7, 2012 at 12:44 PM, Tom de Vries tom_devr...@mentor.com wrote: Richard, attached patch checks that unlinked uses do not contain ssa-names when renaming. This assert triggers when compiling (without the fix) the PR54735 example. AFAIU, it was due to chance that we caught the PR54735 bug by hitting the verification failure, because the new vdef introduced by renaming happened to be the same name as the ssa name referenced in the invalid unlinked use (in terms of maybe_replace_use: rdef == use). The assert from this patch catches all cases that an unlinked use contains an ssa-name. Bootstrapped and reg-tested on x86_64 (Ada inclusive). OK for trunk? I don't think that is exactly what we should assert here ... (I thought about adding checking myself ...). What we'd want to assert is that before any new DEF is registered (which may re-allocate an SSA name) that no uses with SSA_NAME_IN_FREELIST appear. Thus, a light verification pass would be necessary at the beginning of update_ssa (which I queued onto my TODO list ...). We'd want that anyway to for example catch the case where a non-virtual operand is partially renamed. Richard, while developing a patch, I ran into the same 'no immediate_use list' verification error again, caused by an unlinked use containing an ssa-name. The verification error was caused by an error in my patch, but triggered by chance, by an unrelated change in the patch. I've tried to implement the 'light verification pass' you describe above, and I've checked that the error in my patch is found, also when I remove the trigger for the verification error from my patch. Bootstrapped and reg-tested on x86_64 (with the ENABLE_CHECKING guarding removed, in order to ensure the code is active). OK for trunk? Ok with changing the gcc_assert to if (SSA_NAME_IN_FREE_LIST (use)) { error (statement uses released SSA name); debug_gimple_stmt (stmt); err = true; } and after checking all stmts if (err) internal_error (cannot update SSA form); you might want to push/pop TV_TREE_STMT_VERIFY around all this as well. Thanks, Richard. Thanks, - Tom
Re: NRV with address taken
On Thu, Oct 16, 2014 at 9:31 AM, Jakub Jelinek ja...@redhat.com wrote: On Thu, Oct 16, 2014 at 07:37:18AM +0200, Marc Glisse wrote: Hello, the attached one-liner passed bootstrap+testsuite (really all languages) on x86_64-linux-gnu (I got an extra pass of unix/-m32: os but I assume that the failure with trunk was random). The current code is a bit weird: we bail out if either result or found is TREE_ADDRESSABLE, but then the variable replacement includes: TREE_ADDRESSABLE (result) |= TREE_ADDRESSABLE (found); (modified recently, it was a plain assignment before) I mostly ran the testsuite to find a testcase showing why found should not have its address taken, so if someone wants to add one (or at least a comment in tree-nrv.c), that would be good. Does this fix PR63537? I'd worry if both result and found are address taken before the pass, then trying to merge them together might mean something meant to have different addresses collapses into the same object. I'd not worry about that. But I think what the code tries to avoid is failing to adjust a use. But I can't think of a case that isn't handled if it properly replaces uses in address-taking operations (and asms). For example it fails to walk PHI nodes where var can appear as argument. Otherwise it relies on walk_gimple_op and walk_tree which should work. The other thing is aliasing though - if 'found' is TREE_ADDRESSABLE then points-to sets may contain 'found' but they are not adjusted to contain 'result' afterwards. Thus consider X a; X *p = a; a.x = 1; p-x = ...; ... = a.x; return a; where after replacing 'a' with 'result' p-x will no longer alias the store that now looks like result.x and thus we'd happily CSE result.x across the pointer store. Now NRV runs quite late but we do preserve points-to information to RTL (and RTL expansion handles stack slot sharing fine with points-to sets - but we'd need to handle NRV the same here). So ... unfortunately the patch is not safe as-is. Richard. 2014-10-16 Marc Glisse marc.gli...@inria.fr * tree-nrv.c (pass_nrv::execute): Don't disable when address is taken. -- Marc Glisse Index: gcc/tree-nrv.c === --- gcc/tree-nrv.c(revision 216286) +++ gcc/tree-nrv.c(working copy) @@ -210,21 +210,20 @@ pass_nrv::execute (function *fun) return 0; } else found = rhs; /* The returned value must be a local automatic variable of the same type and alignment as the function's result. */ if (TREE_CODE (found) != VAR_DECL || TREE_THIS_VOLATILE (found) || !auto_var_in_fn_p (found, current_function_decl) - || TREE_ADDRESSABLE (found) || DECL_ALIGN (found) DECL_ALIGN (result) || !useless_type_conversion_p (result_type, TREE_TYPE (found))) return 0; } else if (gimple_has_lhs (stmt)) { tree addr = get_base_address (gimple_get_lhs (stmt)); /* If there's any MODIFY of component of RESULT, then bail out. */ Jakub
[PATCH 0/17] KASan 4.9 backport
Hi all, As discussed in https://gcc.gnu.org/ml/gcc/2014-09/msg00234.html , this patchset backports mainline patches necessary for Kernel ASan in GCC 4.9 (gcc-4_9-branch). The patcheset consists of * Asan headers installation (1 patch) * __asan_loadN/__asan_storeN support (3 patches) * instrumentation with calls support (1 patch) * optimization of strlen instrumentation (1 patch) * Kasan support (3 patches) * move inlining to sanopt (1 patches) * bugfixes (7 patches) To my knowledge it does not contain any changes that would influence ABI of generated code. The code was bootstrapped and regtested on x64 (I only tested the net result, not each patch in isolation). -Y
[PATCH 1/17] Install asan_interface.h
This patch adds support for asan_interface.h installation (it's required by tests in other patches). One change compared to mainline: asan_interface.h in 4.9 isn't C friendly (it uses bool type) so I had to replace bool with unsigned char in tests. The actual value isn't used by the test so I believe this shouldn't influence portability in any way. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-05-14 Yury Gribov y.gri...@samsung.com PR sanitizer/61100 * Makefile.am (nodist_saninclude_HEADERS): Install public headers. * Makefile.in: Regenerate. * c-c++-common/asan/asan-interface-1.c: New test. * lib/asan-dg.exp (asan_include_flags): New function. (asan_init): Call asan_include_flags to obtain path to sanitizer headers. diff --git a/gcc/testsuite/c-c++-common/asan/asan-interface-1.c b/gcc/testsuite/c-c++-common/asan/asan-interface-1.c new file mode 100644 index 000..55203ec --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/asan-interface-1.c @@ -0,0 +1,17 @@ +/* Check that interface headers work. */ + +/* { dg-do run { target { *-*-linux* } } } */ + +#ifndef __cplusplus +#define bool unsigned char +#endif + +#include sanitizer/asan_interface.h + +int main() { + char tmp; + if (__asan_address_is_poisoned((volatile char *)tmp + 1)) +return 0; + return 1; +} + diff --git a/gcc/testsuite/lib/asan-dg.exp b/gcc/testsuite/lib/asan-dg.exp index 9ba39db..7a12160 100644 --- a/gcc/testsuite/lib/asan-dg.exp +++ b/gcc/testsuite/lib/asan-dg.exp @@ -23,6 +23,21 @@ proc check_effective_target_faddress_sanitizer {} { } -fsanitize=address] } +proc asan_include_flags {} { +global srcdir +global TESTING_IN_BUILD_TREE + +set flags + +if { [is_remote host] || ! [info exists TESTING_IN_BUILD_TREE] } { + return ${flags} +} + +set flags -I$srcdir/../../libsanitizer/include + +return $flags +} + # # asan_link_flags -- compute library path and flags to find libasan. # (originally from g++.exp) @@ -80,17 +95,19 @@ proc asan_init { args } { } } +set include_flags [asan_include_flags] + if [info exists TEST_ALWAYS_FLAGS] { set asan_saved_TEST_ALWAYS_FLAGS $TEST_ALWAYS_FLAGS } if [info exists ALWAYS_CXXFLAGS] { set ALWAYS_CXXFLAGS [concat {ldflags=$link_flags} $ALWAYS_CXXFLAGS] - set ALWAYS_CXXFLAGS [concat {additional_flags=-fsanitize=address -g} $ALWAYS_CXXFLAGS] + set ALWAYS_CXXFLAGS [concat {additional_flags=-fsanitize=address -g $include_flags} $ALWAYS_CXXFLAGS] } else { if [info exists TEST_ALWAYS_FLAGS] { - set TEST_ALWAYS_FLAGS $link_flags -fsanitize=address -g $TEST_ALWAYS_FLAGS + set TEST_ALWAYS_FLAGS $link_flags -fsanitize=address -g $include_flags $TEST_ALWAYS_FLAGS } else { - set TEST_ALWAYS_FLAGS $link_flags -fsanitize=address -g + set TEST_ALWAYS_FLAGS $link_flags -fsanitize=address -g $include_flags } } if { $link_flags != } { diff --git a/libsanitizer/Makefile.am b/libsanitizer/Makefile.am index b0dc582..6b0c571 100644 --- a/libsanitizer/Makefile.am +++ b/libsanitizer/Makefile.am @@ -1,7 +1,13 @@ ACLOCAL_AMFLAGS = -I .. -I ../config +sanincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include/sanitizer + +nodist_saninclude_HEADERS = + if SANITIZER_SUPPORTED SUBDIRS = sanitizer_common +nodist_saninclude_HEADERS += \ + include/sanitizer/common_interface_defs.h if !USING_MAC_INTERPOSE SUBDIRS += interception endif @@ -9,6 +15,9 @@ if LIBBACKTRACE_SUPPORTED SUBDIRS += libbacktrace endif SUBDIRS += lsan asan ubsan +nodist_saninclude_HEADERS += \ + include/sanitizer/lsan_interface.h \ + include/sanitizer/asan_interface.h if TSAN_SUPPORTED SUBDIRS += tsan endif diff --git a/libsanitizer/Makefile.in b/libsanitizer/Makefile.in index 60cbe2e..0b89245 100644 --- a/libsanitizer/Makefile.in +++ b/libsanitizer/Makefile.in @@ -35,9 +35,12 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -@SANITIZER_SUPPORTED_TRUE@@USING_MAC_INTERPOSE_FALSE@am__append_1 = interception -@LIBBACKTRACE_SUPPORTED_TRUE@@SANITIZER_SUPPORTED_TRUE@am__append_2 = libbacktrace -@SANITIZER_SUPPORTED_TRUE@@TSAN_SUPPORTED_TRUE@am__append_3 = tsan +@SANITIZER_SUPPORTED_TRUE@am__append_1 = include/sanitizer/common_interface_defs.h \ +@SANITIZER_SUPPORTED_TRUE@ include/sanitizer/lsan_interface.h \ +@SANITIZER_SUPPORTED_TRUE@ include/sanitizer/asan_interface.h +@SANITIZER_SUPPORTED_TRUE@@USING_MAC_INTERPOSE_FALSE@am__append_2 = interception +@LIBBACKTRACE_SUPPORTED_TRUE@@SANITIZER_SUPPORTED_TRUE@am__append_3 = libbacktrace +@SANITIZER_SUPPORTED_TRUE@@TSAN_SUPPORTED_TRUE@am__append_4 = tsan subdir = . DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/configure $(am__configure_deps) \ @@ -98,8 +101,9 @@ am__nobase_list = $(am__nobase_strip_setup); \ am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' -am__installdirs =
[PATCH 3/17] Instrumentation of unaligned types
Further work on __asan_loadN/__asan_storeN. I removed the tests (misalign-1.c, misalign-2.c) because (as mentioned in comments for preceeding patch) __asan_loadN/__asan_storeN are disabled for userspace. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-05-30 Jakub Jelinek ja...@redhat.com * asan.c (report_error_func): Add SLOW_P argument, use BUILT_IN_ASAN_*_N if set. (build_check_stmt): Likewise. (instrument_derefs): If T has insufficient alignment, force same handling as for odd sizes. diff --git a/gcc/asan.c b/gcc/asan.c index 1bba680..820d8ef 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1319,7 +1319,7 @@ asan_protect_global (tree decl) IS_STORE is either 1 (for a store) or 0 (for a load). */ static tree -report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes) +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes, bool slow_p) { static enum built_in_function report[2][6] = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2, @@ -1329,7 +1329,8 @@ report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes) BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8, BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } }; if ((size_in_bytes (size_in_bytes - 1)) != 0 - || size_in_bytes 16) + || size_in_bytes 16 + || slow_p) return builtin_decl_implicit (report[is_store][5]); return builtin_decl_implicit (report[is_store][exact_log2 (size_in_bytes)]); } @@ -1508,7 +1509,8 @@ build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location, static void build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, - bool before_p, bool is_store, HOST_WIDE_INT size_in_bytes) + bool before_p, bool is_store, HOST_WIDE_INT size_in_bytes, + bool slow_p = false) { gimple_stmt_iterator gsi; basic_block then_bb, else_bb; @@ -1522,9 +1524,15 @@ build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, HOST_WIDE_INT real_size_in_bytes = size_in_bytes; tree sz_arg = NULL_TREE; - if ((size_in_bytes (size_in_bytes - 1)) != 0 - || size_in_bytes 16) -real_size_in_bytes = 1; + if (size_in_bytes == 1) +slow_p = false; + else if ((size_in_bytes (size_in_bytes - 1)) != 0 + || size_in_bytes 16 + || slow_p) +{ + real_size_in_bytes = 1; + slow_p = true; +} /* Get an iterator on the point where we can add the condition statement for the instrumentation. */ @@ -1582,8 +1590,8 @@ build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, t = gimple_assign_lhs (gimple_seq_last (seq)); gimple_seq_set_location (seq, location); gsi_insert_seq_after (gsi, seq, GSI_CONTINUE_LINKING); - /* For weird access sizes, check first and last byte. */ - if (real_size_in_bytes != size_in_bytes) + /* For weird access sizes or misaligned, check first and last byte. */ + if (slow_p) { g = gimple_build_assign_with_ops (PLUS_EXPR, make_ssa_name (uintptr_type, NULL), @@ -1626,7 +1634,7 @@ build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, /* Generate call to the run-time library (e.g. __asan_report_load8). */ gsi = gsi_start_bb (then_bb); - g = gimple_build_call (report_error_func (is_store, size_in_bytes), + g = gimple_build_call (report_error_func (is_store, size_in_bytes, slow_p), sz_arg ? 2 : 1, base_addr, sz_arg); gimple_set_location (g, location); gsi_insert_after (gsi, g, GSI_NEW_STMT); @@ -1723,8 +1731,31 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, base = build_fold_addr_expr (t); if (!has_mem_ref_been_instrumented (base, size_in_bytes)) { + bool slow_p = false; + if (size_in_bytes 1) + { + if ((size_in_bytes (size_in_bytes - 1)) != 0 + || size_in_bytes 16) + slow_p = true; + else + { + unsigned int align = get_object_alignment (t); + if (align size_in_bytes * BITS_PER_UNIT) + { + /* On non-strict alignment targets, if + 16-byte access is just 8-byte aligned, + this will result in misaligned shadow + memory 2 byte load, but otherwise can + be handled using one read. */ + if (size_in_bytes != 16 + || STRICT_ALIGNMENT + || align 8 * BITS_PER_UNIT) + slow_p = true; + } + } + } build_check_stmt (location, base, iter, /*before_p=*/true, - is_store, size_in_bytes); + is_store, size_in_bytes, slow_p); update_mem_ref_hash_table (base, size_in_bytes); update_mem_ref_hash_table (t, size_in_bytes); }
[committed] gnu11 fallout: i686
The following is a patch by Jakub that ought to fix gnu11 fallout on i686. Applying to trunk. 2014-10-16 Jakub Jelinek ja...@redhat.com Marek Polacek pola...@redhat.com * gcc.dg/20020122-2.c: Use dg-additional-options. Fix implicit declarations. * gcc.dg/pr32176.c: Likewise. * gcc.dg/builtin-apply4.c: Use dg-additional-options. * gcc.dg/ia64-sync-1.c: Fix implicit declarations. * gcc.dg/ia64-sync-2.c: Likewise. * gcc.dg/ia64-sync-3.c: Likewise. * gcc.dg/sync-2.c: Likewise. * gcc.dg/sync-3.c: Likewise. * gcc.target/i386/990524-1.c: Likewise. * gcc.target/i386/avx512f-pr57233.c: Likewise. * gcc.target/i386/avx512f-typecast-1.c: Likewise. * gcc.target/i386/pr26826.c: Likewise. * gcc.target/i386/pr37184.c: Likewise. * gcc.target/i386/20060125-1.c: Fix defaulting to int. * gcc.target/i386/20060125-2.c: Likewise. * gcc.target/i386/memcpy-1.c: Likewise. * gcc.target/i386/pr40934.c: Likewise. * gcc.target/i386/sse-5.c: Likewise. * gcc.target/i386/stackalign/asm-1.c: Likewise. * gcc.target/i386/vectorize4.c: Likewise. * gcc.target/i386/980312-1.c: Fix defaulting to int. Use -fgnu89-inline. * gcc.target/i386/980313-1.c: Likewise. * gcc.target/i386/builtin-apply-mmx.c: Use -fgnu89-inline. * gcc.target/i386/crc32-2.c: Use -std=gnu89. * gcc.target/i386/crc32-3.c: Likewise. * gcc.target/i386/intrinsics_3.c: Likewise. * gcc.target/i386/loop-1.c: Likewise. * gcc.target/i386/pr44948-2a.c: Likewise. * gcc.target/i386/pr47564.c: Likewise. * gcc.target/i386/pr50712.c: Likewise. * gcc.target/i386/stackalign/return-2.c: Likewise. diff --git gcc/testsuite/gcc.dg/20020122-2.c gcc/testsuite/gcc.dg/20020122-2.c index 2499221..684ea5c 100644 --- gcc/testsuite/gcc.dg/20020122-2.c +++ gcc/testsuite/gcc.dg/20020122-2.c @@ -3,9 +3,10 @@ /* { dg-do compile } */ /* { dg-options -O2 -fprefetch-loop-arrays -w } */ -/* { dg-options -O2 -fprefetch-loop-arrays -march=athlon { target { { i?86-*-* x86_64-*-* } ia32 } } } */ +/* { dg-additional-options -march=athlon { target { { i?86-*-* x86_64-*-* } ia32 } } } */ extern int access( char* ); +extern int strcmp(const char *s1, const char *s2); extern int a(); char* foocp(); diff --git gcc/testsuite/gcc.dg/builtin-apply4.c gcc/testsuite/gcc.dg/builtin-apply4.c index c2cedfb..b548df3 100644 --- gcc/testsuite/gcc.dg/builtin-apply4.c +++ gcc/testsuite/gcc.dg/builtin-apply4.c @@ -1,6 +1,6 @@ /* PR tree-optimization/20076 */ /* { dg-options -O2 -Wmissing-noreturn -fgnu89-inline } */ -/* { dg-options -O2 -mno-mmx { target { { i?86-*-* x86_64-*-* } ia32 } } } */ +/* { dg-additional-options -mno-mmx { target { { i?86-*-* x86_64-*-* } ia32 } } } */ /* { dg-do run } */ extern void abort (void); diff --git gcc/testsuite/gcc.dg/ia64-sync-1.c gcc/testsuite/gcc.dg/ia64-sync-1.c index 2cfc144..ce83a84 100644 --- gcc/testsuite/gcc.dg/ia64-sync-1.c +++ gcc/testsuite/gcc.dg/ia64-sync-1.c @@ -13,6 +13,7 @@ __extension__ typedef __SIZE_TYPE__ size_t; extern void abort (void); extern void *memcpy (void *, const void *, size_t); +extern int memcmp (const void *, const void *, size_t); static int AI[12]; static int init_noret_si[12] = { 0, 0, 0, 1, 0, 0, 0 , 0 , -1, 0, 0, -1 }; diff --git gcc/testsuite/gcc.dg/ia64-sync-2.c gcc/testsuite/gcc.dg/ia64-sync-2.c index e6f4cad..3dc1035 100644 --- gcc/testsuite/gcc.dg/ia64-sync-2.c +++ gcc/testsuite/gcc.dg/ia64-sync-2.c @@ -13,6 +13,7 @@ __extension__ typedef __SIZE_TYPE__ size_t; extern void abort (void); extern void *memcpy (void *, const void *, size_t); +extern int memcmp (const void *, const void *, size_t); static int AI[18]; static int init_si[18] = { 0,0,0,1,0,0, 0,0 ,-1,0,0,-1,0,0 ,-1,0,0,-1 }; diff --git gcc/testsuite/gcc.dg/ia64-sync-3.c gcc/testsuite/gcc.dg/ia64-sync-3.c index 621e902..ad27169 100644 --- gcc/testsuite/gcc.dg/ia64-sync-3.c +++ gcc/testsuite/gcc.dg/ia64-sync-3.c @@ -10,6 +10,7 @@ __extension__ typedef __SIZE_TYPE__ size_t; extern void abort (void); extern void *memcpy (void *, const void *, size_t); +extern int memcmp (const void *, const void *, size_t); static int AI[4]; static int init_si[4] = { -30,-30,-50,-50 }; diff --git gcc/testsuite/gcc.dg/pr32176.c gcc/testsuite/gcc.dg/pr32176.c index 3017c34..e619f2f 100644 --- gcc/testsuite/gcc.dg/pr32176.c +++ gcc/testsuite/gcc.dg/pr32176.c @@ -2,7 +2,9 @@ /* { dg-do compile } */ /* { dg-options -O2 -fprefetch-loop-arrays -w } */ -/* { dg-options -O2 -fprefetch-loop-arrays -march=i686 -msse { target { { i?86-*-* x86_64-*-* } ia32 } } } */ +/* { dg-additional-options -march=i686 -msse { target { { i?86-*-* x86_64-*-* } ia32 } } } */ + +extern void _gfortran_abort (); void foo (void) { diff --git gcc/testsuite/gcc.dg/sync-2.c gcc/testsuite/gcc.dg/sync-2.c index
[PATCH 2/17] Introduction of __asan_loadN/__asan_storeN
I disabled __asan_loadN/__asan_storeN because 4.9's Asan runtime doesn't support them. In a later patch I re-enable these functions specifically for KAsan. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-05-30 Jakub Jelinek ja...@redhat.com * sanitizer.def (BUILT_IN_ASAN_REPORT_LOAD_N, BUILT_IN_ASAN_REPORT_STORE_N): New. * asan.c (struct asan_mem_ref): Change access_size type to HOST_WIDE_INT. (asan_mem_ref_init, asan_mem_ref_new, get_mem_refs_of_builtin_call, update_mem_ref_hash_table): Likewise. (asan_mem_ref_hasher::hash): Hash in a HWI. (report_error_func): Change size_in_bytes argument to HWI. Use *_N builtins if size_in_bytes is larger than 16 or not power of two. (build_shadow_mem_access): New function. (build_check_stmt): Use it. Change size_in_bytes argument to HWI. Handle size_in_bytes not power of two or larger than 16. (instrument_derefs): Don't give up if size_in_bytes is not power of two or is larger than 16. diff --git a/gcc/asan.c b/gcc/asan.c index 08cc2c0..1bba680 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -251,8 +251,8 @@ struct asan_mem_ref /* The expression of the beginning of the memory region. */ tree start; - /* The size of the access (can be 1, 2, 4, 8, 16 for now). */ - char access_size; + /* The size of the access. */ + HOST_WIDE_INT access_size; }; static alloc_pool asan_mem_ref_alloc_pool; @@ -274,7 +274,7 @@ asan_mem_ref_get_alloc_pool () /* Initializes an instance of asan_mem_ref. */ static void -asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size) +asan_mem_ref_init (asan_mem_ref *ref, tree start, HOST_WIDE_INT access_size) { ref-start = start; ref-access_size = access_size; @@ -287,7 +287,7 @@ asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size) access to the referenced memory. */ static asan_mem_ref* -asan_mem_ref_new (tree start, char access_size) +asan_mem_ref_new (tree start, HOST_WIDE_INT access_size) { asan_mem_ref *ref = (asan_mem_ref *) pool_alloc (asan_mem_ref_get_alloc_pool ()); @@ -334,7 +334,7 @@ inline hashval_t asan_mem_ref_hasher::hash (const asan_mem_ref *mem_ref) { hashval_t h = iterative_hash_expr (mem_ref-start, 0); - h = iterative_hash_hashval_t (h, mem_ref-access_size); + h = iterative_hash_host_wide_int (mem_ref-access_size, h); return h; } @@ -392,7 +392,7 @@ free_mem_ref_resources () /* Return true iff the memory reference REF has been instrumented. */ static bool -has_mem_ref_been_instrumented (tree ref, char access_size) +has_mem_ref_been_instrumented (tree ref, HOST_WIDE_INT access_size) { asan_mem_ref r; asan_mem_ref_init (r, ref, access_size); @@ -480,7 +480,7 @@ get_mem_refs_of_builtin_call (const gimple call, tree source0 = NULL_TREE, source1 = NULL_TREE, dest = NULL_TREE, len = NULL_TREE; bool is_store = true, got_reference_p = false; - char access_size = 1; + HOST_WIDE_INT access_size = 1; switch (DECL_FUNCTION_CODE (callee)) { @@ -842,7 +842,7 @@ has_stmt_been_instrumented_p (gimple stmt) /* Insert a memory reference into the hash table. */ static void -update_mem_ref_hash_table (tree ref, char access_size) +update_mem_ref_hash_table (tree ref, HOST_WIDE_INT access_size) { hash_table asan_mem_ref_hasher ht = get_mem_ref_hash_table (); @@ -1315,20 +1315,22 @@ asan_protect_global (tree decl) return true; } -/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}. - IS_STORE is either 1 (for a store) or 0 (for a load). - SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */ +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16,_n}. + IS_STORE is either 1 (for a store) or 0 (for a load). */ static tree -report_error_func (bool is_store, int size_in_bytes) +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes) { - static enum built_in_function report[2][5] + static enum built_in_function report[2][6] = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2, BUILT_IN_ASAN_REPORT_LOAD4, BUILT_IN_ASAN_REPORT_LOAD8, - BUILT_IN_ASAN_REPORT_LOAD16 }, + BUILT_IN_ASAN_REPORT_LOAD16, BUILT_IN_ASAN_REPORT_LOAD_N }, { BUILT_IN_ASAN_REPORT_STORE1, BUILT_IN_ASAN_REPORT_STORE2, BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8, - BUILT_IN_ASAN_REPORT_STORE16 } }; + BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } }; + if ((size_in_bytes (size_in_bytes - 1)) != 0 + || size_in_bytes 16) +return builtin_decl_implicit (report[is_store][5]); return builtin_decl_implicit (report[is_store][exact_log2 (size_in_bytes)]); } @@ -1450,6 +1452,47 @@ insert_if_then_before_iter (gimple cond, gsi_insert_after (cond_insert_point, cond, GSI_NEW_STMT); } +/* Build + (base_addr ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ + +static tree +build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location, + tree base_addr, tree shadow_ptr_type) +{ +
[PATCH 4/17] Outline instrumentation
4.9's Asan runtime library provides no support for these so I removed the tests. I've also changed default value of threshold to INT_MAX to completely disable outline instrumentation in userspace Asan. New asan-instrumentation-with-call-threshold parameter. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-16 Yury Gribov y.gri...@samsung.com * asan.c (check_func): New function. (maybe_create_ssa_name): Likewise. (build_check_stmt_with_calls): Likewise. (use_calls_p): Likewise. (report_error_func): Change interface. (build_check_stmt): Allow non-integer lengths; add support for new parameter. (asan_instrument): Likewise. (instrument_mem_region_access): Moved code to build_check_stmt. (instrument_derefs): Likewise. (instrument_strlen_call): Likewise. * cfgcleanup.c (old_insns_match_p): Add support for new functions. * doc/invoke.texi: Describe new parameter. * params.def: Define new parameter. * params.h: Likewise. * sanitizer.def: Describe new builtins. * c-c++-common/asan/instrument-with-calls-1.c: New test. * c-c++-common/asan/instrument-with-calls-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-1.c: Update test patterns. * c-c++-common/asan/no-redundant-instrumentation-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-4.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-5.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-6.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-7.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-8.c: Likewise. diff --git a/gcc/asan.c b/gcc/asan.c index 820d8ef..5c091d0 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -242,6 +242,19 @@ static GTY(()) tree shadow_ptr_types[2]; /* Decl for __asan_option_detect_stack_use_after_return. */ static GTY(()) tree asan_detect_stack_use_after_return; +/* Number of instrumentations in current function so far. */ + +static int asan_num_accesses; + +/* Check whether we should replace inline instrumentation with calls. */ + +static inline bool +use_calls_p () +{ + return ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD INT_MAX + asan_num_accesses = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD; +} + /* Hashtable support for memory references used by gimple statements. */ @@ -1319,7 +1332,7 @@ asan_protect_global (tree decl) IS_STORE is either 1 (for a store) or 0 (for a load). */ static tree -report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes, bool slow_p) +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes, int *nargs) { static enum built_in_function report[2][6] = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2, @@ -1328,13 +1341,37 @@ report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes, bool slow_p) { BUILT_IN_ASAN_REPORT_STORE1, BUILT_IN_ASAN_REPORT_STORE2, BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8, BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } }; - if ((size_in_bytes (size_in_bytes - 1)) != 0 - || size_in_bytes 16 - || slow_p) -return builtin_decl_implicit (report[is_store][5]); + if (size_in_bytes == -1) +{ + *nargs = 2; + return builtin_decl_implicit (report[is_store][5]); +} + *nargs = 1; return builtin_decl_implicit (report[is_store][exact_log2 (size_in_bytes)]); } +/* Construct a function tree for __asan_{load,store}{1,2,4,8,16,_n}. + IS_STORE is either 1 (for a store) or 0 (for a load). */ + +static tree +check_func (bool is_store, int size_in_bytes, int *nargs) +{ + static enum built_in_function check[2][6] += { { BUILT_IN_ASAN_LOAD1, BUILT_IN_ASAN_LOAD2, + BUILT_IN_ASAN_LOAD4, BUILT_IN_ASAN_LOAD8, + BUILT_IN_ASAN_LOAD16, BUILT_IN_ASAN_LOADN }, + { BUILT_IN_ASAN_STORE1, BUILT_IN_ASAN_STORE2, + BUILT_IN_ASAN_STORE4, BUILT_IN_ASAN_STORE8, + BUILT_IN_ASAN_STORE16, BUILT_IN_ASAN_STOREN } }; + if (size_in_bytes == -1) +{ + *nargs = 2; + return builtin_decl_implicit (check[is_store][5]); +} + *nargs = 1; + return builtin_decl_implicit (check[is_store][exact_log2 (size_in_bytes)]); +} + /* Split the current basic block and create a condition statement insertion point right before or after the statement pointed to by ITER. Return an iterator to the point at which the caller might @@ -1494,6 +1531,76 @@ build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location, return gimple_assign_lhs (g); } +/* BASE can already be an SSA_NAME; in that case, do not create a + new SSA_NAME for it. */ + +static tree +maybe_create_ssa_name (location_t loc, tree base, gimple_stmt_iterator *iter, + bool before_p) +{ + if (TREE_CODE (base) == SSA_NAME) +return base; + gimple g += gimple_build_assign_with_ops (TREE_CODE (base), +make_ssa_name (TREE_TYPE (base), NULL), +base, NULL_TREE); + gimple_set_location (g, loc); + if (before_p) +gsi_insert_before
[PATCH 5/17] Fix bootstrap error
Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-16 Yury Gribov y.gri...@samsung.com * asan.c (build_check_stmt): Fix maybe-uninitialized warning. diff --git a/gcc/asan.c b/gcc/asan.c index 5c091d0..3729178 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1635,6 +1635,13 @@ build_check_stmt (location_t location, tree base, tree len, gcc_assert (!(size_in_bytes 0 !non_zero_len_p)); + if (start_instrumented end_instrumented) +{ + if (!before_p) +gsi_next (iter); + return; +} + if (len) len = unshare_expr (len); else @@ -1734,7 +1741,7 @@ build_check_stmt (location_t location, tree base, tree len, gsi_insert_after (gsi, g, GSI_NEW_STMT); tree base_addr = gimple_assign_lhs (g); - tree t; + tree t = NULL_TREE; if (real_size_in_bytes = 8) { tree shadow = build_shadow_mem_access (gsi, location, base_addr,
[PATCH 7/17] Fix for PR 61547
Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-18 Yury Gribov y.gri...@samsung.com PR sanitizer/61547 * asan.c (instrument_strlen_call): Fixed instrumentation of trailing byte. * c-c++-common/asan/strlen-overflow-1.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index c838423..06177ac 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -2037,19 +2037,19 @@ instrument_strlen_call (gimple_stmt_iterator *iter) build_check_stmt (loc, gimple_assign_lhs (str_arg_ssa), NULL_TREE, 1, iter, /*non_zero_len_p*/true, /*before_p=*/true, - /*is_store=*/false, /*is_scalar_access*/false, /*align*/0); + /*is_store=*/false, /*is_scalar_access*/true, /*align*/0); - gimple stmt = -gimple_build_assign_with_ops (PLUS_EXPR, - make_ssa_name (TREE_TYPE (len), NULL), - len, - build_int_cst (TREE_TYPE (len), 1)); - gimple_set_location (stmt, loc); - gsi_insert_after (iter, stmt, GSI_NEW_STMT); + gimple g = +gimple_build_assign_with_ops (POINTER_PLUS_EXPR, + make_ssa_name (cptr_type, NULL), + gimple_assign_lhs (str_arg_ssa), + len); + gimple_set_location (g, loc); + gsi_insert_after (iter, g, GSI_NEW_STMT); - build_check_stmt (loc, gimple_assign_lhs (stmt), len, 1, iter, + build_check_stmt (loc, gimple_assign_lhs (g), NULL_TREE, 1, iter, /*non_zero_len_p*/true, /*before_p=*/false, - /*is_store=*/false, /*is_scalar_access*/false, /*align*/0); + /*is_store=*/false, /*is_scalar_access*/true, /*align*/0); return true; } diff --git a/gcc/testsuite/c-c++-common/asan/strlen-overflow-1.c b/gcc/testsuite/c-c++-common/asan/strlen-overflow-1.c new file mode 100644 index 000..426c8fe --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/strlen-overflow-1.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-skip-if { *-*-* } { -flto } { } } */ +/* { dg-shouldfail asan } */ + +#ifndef __cplusplus +#define bool unsigned char +#endif + +#include sanitizer/asan_interface.h + +char a[2] = 0; + +#ifdef __cplusplus +extern C +#endif + +__attribute__((no_sanitize_address, noinline)) __SIZE_TYPE__ +strlen (const char *p) { + + __SIZE_TYPE__ n = 0; + for (; *p; ++n, ++p); + return n; +} + +int main () { + char *p = a[0]; + asm ( : +r(p)); + __asan_poison_memory_region ((char *)a[1], 1); + return __builtin_strlen (a); +} + +/* { dg-output READ of size 1 at 0x\[0-9a-f\]+ thread T0.*(\n|\r\n|\r) } */ +/* { dg-output #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*strlen-overflow-1.c:29|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r) } */
[PATCH 6/17] Fix for PR 61530
Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-18 Yury Gribov y.gri...@samsung.com PR sanitizer/61530 * asan.c (build_check_stmt): Add condition. * c-c++-common/asan/pr61530.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index 3729178..c838423 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1653,6 +1653,7 @@ build_check_stmt (location_t location, tree base, tree len, if (size_in_bytes 1) { if ((size_in_bytes (size_in_bytes - 1)) != 0 + || !is_scalar_access || size_in_bytes 16) size_in_bytes = -1; else if (align align size_in_bytes * BITS_PER_UNIT) diff --git a/gcc/testsuite/c-c++-common/asan/pr61530.c b/gcc/testsuite/c-c++-common/asan/pr61530.c new file mode 100644 index 000..e306a71 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/pr61530.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-shouldfail asan } */ + +__attribute__((noinline,noclone)) void +foo (char *a, char *b) { + a[0] = b[0] = 0; + __builtin_memcpy(a, b, 4); +} + +int +main () { + char a, b; + foo (a, b); + return 0; +} + +/* { dg-output ERROR: AddressSanitizer: stack-buffer-overflow } */
[PATCH 8/17] Optimization of strlen instrumentation
Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). Do not instrument first byte in strlen if already instrumented. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-24 Max Ostapenko m.ostape...@partner.samsung.com * asan.c (instrument_strlen_call): Do not instrument first byte in strlen if already instrumented. * c-c++-common/asan/no-redundant-instrumentation-9.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index 06177ac..0789ad3 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -2026,6 +2026,7 @@ instrument_strlen_call (gimple_stmt_iterator *iter) location_t loc = gimple_location (call); tree str_arg = gimple_call_arg (call, 0); + bool start_instrumented = has_mem_ref_been_instrumented (str_arg, 1); tree cptr_type = build_pointer_type (char_type_node); gimple str_arg_ssa = @@ -2037,7 +2038,8 @@ instrument_strlen_call (gimple_stmt_iterator *iter) build_check_stmt (loc, gimple_assign_lhs (str_arg_ssa), NULL_TREE, 1, iter, /*non_zero_len_p*/true, /*before_p=*/true, - /*is_store=*/false, /*is_scalar_access*/true, /*align*/0); + /*is_store=*/false, /*is_scalar_access*/true, /*align*/0, + start_instrumented, start_instrumented); gimple g = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, diff --git a/gcc/testsuite/c-c++-common/asan/no-redundant-instrumentation-9.c b/gcc/testsuite/c-c++-common/asan/no-redundant-instrumentation-9.c new file mode 100644 index 000..c6575ad --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/no-redundant-instrumentation-9.c @@ -0,0 +1,13 @@ +/* { dg-options -fdump-tree-asan0 } */ +/* { dg-do compile } */ +/* { dg-skip-if { *-*-* } { * } { -O0 } } */ + +__SIZE_TYPE__ +f (char *a) +{ + a[0] = '1'; + return __builtin_strlen (a); +} + +/* { dg-final { scan-tree-dump-times __asan_report_load1 1 asan0 } } */ +/* { dg-final { cleanup-tree-dump asan0 } } */
[PATCH 9/17] Initial KAsan support
The patch was slightly updated to take care of missing UBSan work (SANITIZE_FLOAT_DIVIDE, SANITIZE_FLOAT_CAST, SANITIZE_BOUNDS). 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-07-31 Yury Gribov y.gri...@samsung.com * doc/cpp.texi (__SANITIZE_ADDRESS__): Updated description. * doc/invoke.texi (-fsanitize=kernel-address): Describe new option. * flag-types.h (SANITIZE_USER_ADDRESS, SANITIZE_KERNEL_ADDRESS): New enums. * gcc.c (sanitize_spec_function): Support new option. (SANITIZER_SPEC): Remove now redundant check. * opts.c (common_handle_option): Support new option. (finish_options): Check for incompatibilities. * toplev.c (process_options): Split userspace-specific checks. diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi index aaed739..0a6e50c 100644 --- a/gcc/doc/cpp.texi +++ b/gcc/doc/cpp.texi @@ -2354,8 +2354,8 @@ This macro is defined, with value 3, when @option{-fstack-protector-strong} is in use. @item __SANITIZE_ADDRESS__ -This macro is defined, with value 1, when @option{-fsanitize=address} is -in use. +This macro is defined, with value 1, when @option{-fsanitize=address} +or @option{-fsanitize=kernel-address} are in use. @item __TIMESTAMP__ This macro expands to a string constant that describes the date and time diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4012f08..179f273 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -5286,6 +5286,11 @@ more details. The run-time behavior can be influenced using the @url{https://code.google.com/p/address-sanitizer/wiki/Flags#Run-time_flags} for a list of supported options. +@item -fsanitize=kernel-address +@opindex fsanitize=kernel-address +Enable AddressSanitizer for Linux kernel. +See @uref{http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel} for more details. + @item -fsanitize=thread @opindex fsanitize=thread Enable ThreadSanitizer, a fast data race detector. diff --git a/gcc/flag-types.h b/gcc/flag-types.h index ec16faa..1dd1b3e 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -204,20 +204,22 @@ enum vect_cost_model { enum sanitize_code { /* AddressSanitizer. */ SANITIZE_ADDRESS = 1 0, + SANITIZE_USER_ADDRESS = 1 1, + SANITIZE_KERNEL_ADDRESS = 1 2, /* ThreadSanitizer. */ - SANITIZE_THREAD = 1 1, + SANITIZE_THREAD = 1 3, /* LeakSanitizer. */ - SANITIZE_LEAK = 1 2, + SANITIZE_LEAK = 1 4, /* UndefinedBehaviorSanitizer. */ - SANITIZE_SHIFT = 1 3, - SANITIZE_DIVIDE = 1 4, - SANITIZE_UNREACHABLE = 1 5, - SANITIZE_VLA = 1 6, - SANITIZE_NULL = 1 7, - SANITIZE_RETURN = 1 8, - SANITIZE_SI_OVERFLOW = 1 9, - SANITIZE_BOOL = 1 10, - SANITIZE_ENUM = 1 11, + SANITIZE_SHIFT = 1 5, + SANITIZE_DIVIDE = 1 6, + SANITIZE_UNREACHABLE = 1 7, + SANITIZE_VLA = 1 8, + SANITIZE_NULL = 1 9, + SANITIZE_RETURN = 1 10, + SANITIZE_SI_OVERFLOW = 1 11, + SANITIZE_BOOL = 1 12, + SANITIZE_ENUM = 1 13, SANITIZE_UNDEFINED = SANITIZE_SHIFT | SANITIZE_DIVIDE | SANITIZE_UNREACHABLE | SANITIZE_VLA | SANITIZE_NULL | SANITIZE_RETURN | SANITIZE_SI_OVERFLOW | SANITIZE_BOOL | SANITIZE_ENUM diff --git a/gcc/gcc.c b/gcc/gcc.c index 9c4c40c..1034de8 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -734,8 +734,7 @@ proper position among the other output files. */ #ifndef SANITIZER_SPEC #define SANITIZER_SPEC \ %{!nostdlib:%{!nodefaultlibs:%{%:sanitize(address): LIBASAN_SPEC \ -%{static:%ecannot specify -static with -fsanitize=address}\ -%{%:sanitize(thread):%e-fsanitize=address is incompatible with -fsanitize=thread}}\ +%{static:%ecannot specify -static with -fsanitize=address}}\ %{%:sanitize(thread): LIBTSAN_SPEC \ %{!pie:%{!shared:%e-fsanitize=thread linking must be done with -pie or -shared}}}\ %{%:sanitize(undefined): LIBUBSAN_SPEC }\ @@ -8173,7 +8172,9 @@ sanitize_spec_function (int argc, const char **argv) return NULL; if (strcmp (argv[0], address) == 0) -return (flag_sanitize SANITIZE_ADDRESS) ? : NULL; +return (flag_sanitize SANITIZE_USER_ADDRESS) ? : NULL; + if (strcmp (argv[0], kernel-address) == 0) +return (flag_sanitize SANITIZE_KERNEL_ADDRESS) ? : NULL; if (strcmp (argv[0], thread) == 0) return (flag_sanitize SANITIZE_THREAD) ? : NULL; if (strcmp (argv[0], undefined) == 0) diff --git a/gcc/opts.c b/gcc/opts.c index bbd6b9c..fbdebd7 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -868,6 +868,20 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, /* The -gsplit-dwarf option requires -gpubnames. */ if (opts-x_dwarf_split_debug_info) opts-x_debug_generate_pub_sections = 1; + + /* Userspace and kernel ASan conflict with each other and with TSan. */ + + if ((flag_sanitize SANITIZE_USER_ADDRESS) + (flag_sanitize SANITIZE_KERNEL_ADDRESS)) +error_at (loc, + -fsanitize=address is incompatible with + -fsanitize=kernel-address); + + if ((flag_sanitize
[PATCH 11/17] Move Asan instrumentation to sanopt pass
The patch was slightly updated to take care of missing UBSan work (UBSAN_BOUNDS). Move inlining of Asan memory checks to sanopt pass. Change asan-instrumentation-with-call-threshold to more closely match LLVM. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-11 Yury Gribov y.gri...@samsung.com * asan.c (asan_check_flags): New enum. (build_check_stmt_with_calls): Removed function. (build_check_stmt): Split inlining logic to asan_expand_check_ifn. (instrument_derefs): Rename parameter. (instrument_mem_region_access): Rename parameter. (instrument_strlen_call): Likewise. (asan_expand_check_ifn): New function. (asan_instrument): Remove old code. (pass_sanopt::execute): Change handling of asan-instrumentation-with-call-threshold. (asan_clear_shadow): Fix formatting. (asan_function_start): Likewise. (asan_emit_stack_protection): Likewise. * doc/invoke.texi (asan-instrumentation-with-call-threshold): Update description. * internal-fn.c (expand_ASAN_CHECK): New function. * internal-fn.def (ASAN_CHECK): New internal function. * params.def (PARAM_ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD): Update description. (PARAM_ASAN_USE_AFTER_RETURN): Likewise. * tree.c: Small comment fix. * c-c++-common/asan/inc.c: Update test. * c-c++-common/asan/instrument-with-calls-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-1.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-3.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-4.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-5.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-6.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-7.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-8.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-9.c: Likewise. diff --git a/gcc/asan.c b/gcc/asan.c index 0789ad3..f55b024 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -242,18 +242,16 @@ static GTY(()) tree shadow_ptr_types[2]; /* Decl for __asan_option_detect_stack_use_after_return. */ static GTY(()) tree asan_detect_stack_use_after_return; -/* Number of instrumentations in current function so far. */ - -static int asan_num_accesses; - -/* Check whether we should replace inline instrumentation with calls. */ - -static inline bool -use_calls_p () -{ - return ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD INT_MAX - asan_num_accesses = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD; -} +/* Various flags for Asan builtins. */ +enum asan_check_flags +{ + ASAN_CHECK_STORE = 1 0, + ASAN_CHECK_SCALAR_ACCESS = 1 1, + ASAN_CHECK_NON_ZERO_LEN = 1 2, + ASAN_CHECK_START_INSTRUMENTED = 1 3, + ASAN_CHECK_END_INSTRUMENTED = 1 4, + ASAN_CHECK_LAST +}; /* Hashtable support for memory references used by gimple statements. */ @@ -942,7 +940,7 @@ asan_clear_shadow (rtx shadow_mem, HOST_WIDE_INT len) emit_move_insn (shadow_mem, const0_rtx); tmp = expand_simple_binop (Pmode, PLUS, addr, gen_int_mode (4, Pmode), addr, - true, OPTAB_LIB_WIDEN); + true, OPTAB_LIB_WIDEN); if (tmp != addr) emit_move_insn (addr, tmp); emit_cmp_and_jump_insns (addr, end, LT, NULL_RTX, Pmode, true, top_label); @@ -957,7 +955,7 @@ asan_function_start (void) section *fnsec = function_section (current_function_decl); switch_to_section (fnsec); ASM_OUTPUT_DEBUG_LABEL (asm_out_file, LASANPC, - current_function_funcdef_no); + current_function_funcdef_no); } /* Insert code to protect stack vars. The prologue sequence should be emitted @@ -1022,7 +1020,7 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, { use_after_return_class = floor_log2 (asan_frame_size - 1) - 5; /* __asan_stack_malloc_N guarantees alignment - N 6 ? (64 N) : 4096 bytes. */ + N 6 ? (64 N) : 4096 bytes. */ if (alignb (use_after_return_class 6 ? (64U use_after_return_class) : 4096U)) use_after_return_class = -1; @@ -1095,7 +1093,7 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, ASM_GENERATE_INTERNAL_LABEL (buf, LASANPC, current_function_funcdef_no); id = get_identifier (buf); decl = build_decl (DECL_SOURCE_LOCATION (current_function_decl), -VAR_DECL, id, char_type_node); + VAR_DECL, id, char_type_node); SET_DECL_ASSEMBLER_NAME (decl, id); TREE_ADDRESSABLE (decl) = 1; TREE_READONLY (decl) = 1; @@ -1552,55 +1550,6 @@ maybe_create_ssa_name (location_t loc, tree base, gimple_stmt_iterator *iter, return gimple_assign_lhs (g); } -/* Instrument the memory access instruction using callbacks. - Parameters are similar to BUILD_CHECK_STMT. */ - -static void -build_check_stmt_with_calls (location_t loc, tree base, tree len, - HOST_WIDE_INT size_in_bytes, gimple_stmt_iterator *iter, - bool
[PATCH 10/17] Support fnspec for internal fns
The patch was slightly updated to take care of missing UBSan work (UBSAN_BOUNDS). Added fnspec to internal functions. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-11 Yury Gribov y.gri...@samsung.com * gimple.c (gimple_call_fnspec): Support internal functions. (gimple_call_return_flags): Use const. * Makefile.in (GTFILES): Add internal-fn.h to list of GC files. * internal-fn.def: Add fnspec information. * internal-fn.h (internal_fn_fnspec): New function. (init_internal_fns): Declare new function. * internal-fn.c (internal_fn_fnspec_array): New global variable. (init_internal_fns): New function. * tree-core.h: Update macro call. * tree.c (build_common_builtin_nodes): Initialize internal fns. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 5dd1c25..b47733c 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2282,7 +2282,9 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/vtable-verify.c \ $(srcdir)/asan.c \ $(srcdir)/ubsan.c \ - $(srcdir)/tsan.c $(srcdir)/ipa-devirt.c \ + $(srcdir)/tsan.c \ + $(srcdir)/ipa-devirt.c \ + $(srcdir)/internal-fn.h \ @all_gtfiles@ # Compute the list of GT header files from the corresponding C sources, diff --git a/gcc/gimple.c b/gcc/gimple.c index 2a278e4..30d1653 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -1329,11 +1329,14 @@ gimple_call_flags (const_gimple stmt) /* Return the fn spec string for call STMT. */ -static tree +static const_tree gimple_call_fnspec (const_gimple stmt) { tree type, attr; + if (gimple_call_internal_p (stmt)) +return internal_fn_fnspec (gimple_call_internal_fn (stmt)); + type = gimple_call_fntype (stmt); if (!type) return NULL_TREE; @@ -1350,7 +1353,7 @@ gimple_call_fnspec (const_gimple stmt) int gimple_call_arg_flags (const_gimple stmt, unsigned arg) { - tree attr = gimple_call_fnspec (stmt); + const_tree attr = gimple_call_fnspec (stmt); if (!attr || 1 + arg = (unsigned) TREE_STRING_LENGTH (attr)) return 0; @@ -1384,7 +1387,7 @@ gimple_call_arg_flags (const_gimple stmt, unsigned arg) int gimple_call_return_flags (const_gimple stmt) { - tree attr; + const_tree attr; if (gimple_call_flags (stmt) ECF_MALLOC) return ERF_NOALIAS; diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 1062ea8..5b881f1 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -40,7 +40,7 @@ along with GCC; see the file COPYING3. If not see /* The names of each internal function, indexed by function number. */ const char *const internal_fn_name_array[] = { -#define DEF_INTERNAL_FN(CODE, FLAGS) #CODE, +#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) #CODE, #include internal-fn.def #undef DEF_INTERNAL_FN invalid-fn @@ -48,12 +48,26 @@ const char *const internal_fn_name_array[] = { /* The ECF_* flags of each internal function, indexed by function number. */ const int internal_fn_flags_array[] = { -#define DEF_INTERNAL_FN(CODE, FLAGS) FLAGS, +#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) FLAGS, #include internal-fn.def #undef DEF_INTERNAL_FN 0 }; +/* Fnspec of each internal function, indexed by function number. */ +const_tree internal_fn_fnspec_array[IFN_LAST + 1]; + +void +init_internal_fns () +{ +#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ + if (FNSPEC) internal_fn_fnspec_array[IFN_##CODE] = \ +build_string ((int) sizeof (FNSPEC) + 1, FNSPEC ? FNSPEC : ); +#include internal-fn.def +#undef DEF_INTERNAL_FN + internal_fn_fnspec_array[IFN_LAST] = 0; +} + /* ARRAY_TYPE is an array of vector modes. Return the associated insn for load-lanes-style optab OPTAB. The insn must exist. */ @@ -891,7 +905,7 @@ expand_BUILTIN_EXPECT (gimple stmt) where STMT is the statement that performs the call. */ static void (*const internal_fn_expanders[]) (gimple) = { -#define DEF_INTERNAL_FN(CODE, FLAGS) expand_##CODE, +#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) expand_##CODE, #include internal-fn.def #undef DEF_INTERNAL_FN 0 diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 31dc4c9..f60a9b0 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -28,28 +28,29 @@ along with GCC; see the file COPYING3. If not see Each entry in this file has the form: - DEF_INTERNAL_FN (NAME, FLAGS) + DEF_INTERNAL_FN (NAME, FLAGS, FNSPEC) - where NAME is the name of the function and FLAGS is a set of - ECF_* flags. Each entry must have a corresponding expander - of the form: + where NAME is the name of the function, FLAGS is a set of + ECF_* flags and FNSPEC is a string describing functions fnspec. + + Each entry must have a corresponding expander of the form: void expand_NAME (gimple stmt) where STMT is the statement that performs the call. */ -DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF) -DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
[PATCH 12/17] Fix off-by-one
Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-12 Yury Gribov y.gri...@samsung.com * internal-fn.c (init_internal_fns): Fix off-by-one. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 0749dce..d64e20d 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -62,7 +62,7 @@ init_internal_fns () { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ if (FNSPEC) internal_fn_fnspec_array[IFN_##CODE] = \ -build_string ((int) sizeof (FNSPEC) + 1, FNSPEC ? FNSPEC : ); +build_string ((int) sizeof (FNSPEC), FNSPEC ? FNSPEC : ); #include internal-fn.def #undef DEF_INTERNAL_FN internal_fn_fnspec_array[IFN_LAST] = 0;
Re: [PATCH 1/17] Install asan_interface.h
On Thu, Oct 16, 2014 at 12:35:51PM +0400, Yury Gribov wrote: 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-05-14 Yury Gribov y.gri...@samsung.com PR sanitizer/61100 * Makefile.am (nodist_saninclude_HEADERS): Install public headers. * Makefile.in: Regenerate. * c-c++-common/asan/asan-interface-1.c: New test. * lib/asan-dg.exp (asan_include_flags): New function. (asan_init): Call asan_include_flags to obtain path to sanitizer headers. --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/asan-interface-1.c @@ -0,0 +1,17 @@ +/* Check that interface headers work. */ + +/* { dg-do run { target { *-*-linux* } } } */ + +#ifndef __cplusplus +#define bool unsigned char +#endif + Please #include stdbool.h instead. +#include sanitizer/asan_interface.h + +int main() { + char tmp; + if (__asan_address_is_poisoned((volatile char *)tmp + 1)) +return 0; + return 1; +} + Ok with that change, but please wait for the whole series to be approved (applies to all patches fromthe series). Jakub
[PATCH 13/17] Fix for PR 62089
Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-18 Yury Gribov y.gri...@samsung.com PR sanitizer/62089 * asan.c (instrument_derefs): Fix bitfield check. * c-c++-common/asan/pr62089.c: New test. * c-c++-common/asan/bitfield-1.c: New test. * c-c++-common/asan/bitfield-2.c: New test. * c-c++-common/asan/bitfield-3.c: New test. * c-c++-common/asan/bitfield-4.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index f55b024..e445470 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1688,21 +1688,19 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, int volatilep = 0, unsignedp = 0; tree inner = get_inner_reference (t, bitsize, bitpos, offset, mode, unsignedp, volatilep, false); - if (((size_in_bytes (size_in_bytes - 1)) == 0 -(bitpos % (size_in_bytes * BITS_PER_UNIT))) - || bitsize != size_in_bytes * BITS_PER_UNIT) + + if (TREE_CODE (t) == COMPONENT_REF + DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)) != NULL_TREE) { - if (TREE_CODE (t) == COMPONENT_REF - DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)) != NULL_TREE) - { - tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)); - instrument_derefs (iter, build3 (COMPONENT_REF, TREE_TYPE (repr), - TREE_OPERAND (t, 0), repr, - NULL_TREE), location, is_store); - } + tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)); + instrument_derefs (iter, build3 (COMPONENT_REF, TREE_TYPE (repr), + TREE_OPERAND (t, 0), repr, + NULL_TREE), location, is_store); return; } - if (bitpos % BITS_PER_UNIT) + + if (bitpos % BITS_PER_UNIT + || bitsize != size_in_bytes * BITS_PER_UNIT) return; if (TREE_CODE (inner) == VAR_DECL diff --git a/gcc/testsuite/c-c++-common/asan/bitfield-1.c b/gcc/testsuite/c-c++-common/asan/bitfield-1.c new file mode 100644 index 000..b3f300c --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/bitfield-1.c @@ -0,0 +1,25 @@ +/* Check that Asan correctly instruments bitfields with non-round size. */ + +/* { dg-do run } */ +/* { dg-shouldfail asan } */ + +struct A +{ + char base; + int : 4; + long x : 7; +}; + +int __attribute__ ((noinline, noclone)) +f (void *p) { + return ((struct A *)p)-x; +} + +int +main () +{ + char a = 0; + return f (a); +} + +/* { dg-output ERROR: AddressSanitizer: stack-buffer-overflow } */ diff --git a/gcc/testsuite/c-c++-common/asan/bitfield-2.c b/gcc/testsuite/c-c++-common/asan/bitfield-2.c new file mode 100644 index 000..8ab0f80 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/bitfield-2.c @@ -0,0 +1,25 @@ +/* Check that Asan correctly instruments bitfields with non-round offset. */ + +/* { dg-do run } */ +/* { dg-shouldfail asan } */ + +struct A +{ + char base; + int : 7; + int x : 8; +}; + +int __attribute__ ((noinline, noclone)) +f (void *p) { + return ((struct A *)p)-x; +} + +int +main () +{ + char a = 0; + return f (a); +} + +/* { dg-output ERROR: AddressSanitizer: stack-buffer-overflow } */ diff --git a/gcc/testsuite/c-c++-common/asan/bitfield-3.c b/gcc/testsuite/c-c++-common/asan/bitfield-3.c new file mode 100644 index 000..c590778 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/bitfield-3.c @@ -0,0 +1,25 @@ +/* Check that Asan correctly instruments bitfields with round offset. */ + +/* { dg-do run } */ +/* { dg-shouldfail asan } */ + +struct A +{ + char base; + int : 8; + int x : 8; +}; + +int __attribute__ ((noinline, noclone)) +f (void *p) { + return ((struct A *)p)-x; +} + +int +main () +{ + char a = 0; + return f (a); +} + +/* { dg-output ERROR: AddressSanitizer: stack-buffer-overflow } */ diff --git a/gcc/testsuite/c-c++-common/asan/bitfield-4.c b/gcc/testsuite/c-c++-common/asan/bitfield-4.c new file mode 100644 index 000..94de9a4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/bitfield-4.c @@ -0,0 +1,25 @@ +/* Check that Asan correctly instruments bitfields with round offset. */ + +/* { dg-do run } */ +/* { dg-shouldfail asan } */ + +struct A +{ + char base; + int : 0; + int x : 8; +}; + +int __attribute__ ((noinline, noclone)) +f (void *p) { + return ((struct A *)p)-x; +} + +int +main () +{ + char a = 0; + return f (a); +} + +/* { dg-output ERROR: AddressSanitizer: stack-buffer-overflow } */ diff --git a/gcc/testsuite/c-c++-common/asan/pr62089.c b/gcc/testsuite/c-c++-common/asan/pr62089.c new file mode 100644 index 000..6c25ea4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/pr62089.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-shouldfail asan } */ + +#ifndef __cplusplus +#define bool unsigned char +#endif + +#include sanitizer/asan_interface.h + +struct vfsmount {}; +struct dentry {}; + +struct path { + struct vfsmount *mnt; + struct dentry *dentry; +}; + +struct fs_struct { + int users; + int lock; + int seq; + int umask; + int in_exec; +
[PATCH 14/17] Fix test on 32-bit platforms
Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-28 Yury Gribov y.gri...@samsung.com * c-c++-common/asan/pr62089.c: Fix test on 32-bit platforms. diff --git a/gcc/testsuite/c-c++-common/asan/pr62089.c b/gcc/testsuite/c-c++-common/asan/pr62089.c index 6c25ea4..48167b1 100644 --- a/gcc/testsuite/c-c++-common/asan/pr62089.c +++ b/gcc/testsuite/c-c++-common/asan/pr62089.c @@ -38,4 +38,4 @@ main () { return 0; } -/* { dg-output ERROR: AddressSanitizer: use-after-poison } */ +/* { dg-output ERROR: AddressSanitizer:\[^\n\r]*on address\[^\n\r]* } */
[PATCH 15/17] Fix for PR 61897 and PR 62140
Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-09-01 Yury Gribov y.gri...@samsung.com PR sanitizer/61897 PR sanitizer/62140 * asan.c (asan_mem_ref_get_end): Handle non-ptroff_t lengths. (build_check_stmt): Likewise. (instrument_strlen_call): Likewise. (asan_expand_check_ifn): Likewise and fix types. (maybe_cast_to_ptrmode): New function. * c-c++-common/asan/pr62140-1.c: New test. * c-c++-common/asan/pr62140-2.c: New test. diff --git a/gcc/asan.c b/gcc/asan.c index e445470..5c6d1c5 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -316,6 +316,9 @@ asan_mem_ref_get_end (tree start, tree len) if (len == NULL_TREE || integer_zerop (len)) return start; + if (!ptrofftype_p (len)) +len = convert_to_ptrofftype (len); + return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (start), start, len); } @@ -1550,6 +1553,27 @@ maybe_create_ssa_name (location_t loc, tree base, gimple_stmt_iterator *iter, return gimple_assign_lhs (g); } +/* LEN can already have necessary size and precision; + in that case, do not create a new variable. */ + +tree +maybe_cast_to_ptrmode (location_t loc, tree len, gimple_stmt_iterator *iter, + bool before_p) +{ + if (ptrofftype_p (len)) +return len; + gimple g += gimple_build_assign_with_ops (NOP_EXPR, +make_ssa_name (pointer_sized_int_node, NULL), +len, NULL); + gimple_set_location (g, loc); + if (before_p) +gsi_insert_before (iter, g, GSI_SAME_STMT); + else +gsi_insert_after (iter, g, GSI_NEW_STMT); + return gimple_assign_lhs (g); +} + /* Instrument the memory access instruction BASE. Insert new statements before or after ITER. @@ -1595,7 +1619,10 @@ build_check_stmt (location_t loc, tree base, tree len, base = maybe_create_ssa_name (loc, base, gsi, before_p); if (len) -len = unshare_expr (len); +{ + len = unshare_expr (len); + len = maybe_cast_to_ptrmode (loc, len, iter, before_p); +} else { gcc_assert (size_in_bytes != -1); @@ -1802,6 +1829,7 @@ instrument_mem_region_access (tree base, tree len, static bool instrument_strlen_call (gimple_stmt_iterator *iter) { + gimple g; gimple call = gsi_stmt (*iter); gcc_assert (is_gimple_call (call)); @@ -1810,6 +1838,8 @@ instrument_strlen_call (gimple_stmt_iterator *iter) DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL DECL_FUNCTION_CODE (callee) == BUILT_IN_STRLEN); + location_t loc = gimple_location (call); + tree len = gimple_call_lhs (call); if (len == NULL) /* Some passes might clear the return value of the strlen call; @@ -1818,28 +1848,28 @@ instrument_strlen_call (gimple_stmt_iterator *iter) return false; gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (len))); - location_t loc = gimple_location (call); + len = maybe_cast_to_ptrmode (loc, len, iter, /*before_p*/false); + tree str_arg = gimple_call_arg (call, 0); bool start_instrumented = has_mem_ref_been_instrumented (str_arg, 1); tree cptr_type = build_pointer_type (char_type_node); - gimple str_arg_ssa = -gimple_build_assign_with_ops (NOP_EXPR, - make_ssa_name (cptr_type, NULL), - str_arg, NULL); - gimple_set_location (str_arg_ssa, loc); - gsi_insert_before (iter, str_arg_ssa, GSI_SAME_STMT); - - build_check_stmt (loc, gimple_assign_lhs (str_arg_ssa), NULL_TREE, 1, iter, + g = gimple_build_assign_with_ops (NOP_EXPR, +make_ssa_name (cptr_type, NULL), +str_arg, NULL); + gimple_set_location (g, loc); + gsi_insert_before (iter, g, GSI_SAME_STMT); + str_arg = gimple_assign_lhs (g); + + build_check_stmt (loc, str_arg, NULL_TREE, 1, iter, /*is_non_zero_len*/true, /*before_p=*/true, /*is_store=*/false, /*is_scalar_access*/true, /*align*/0, start_instrumented, start_instrumented); - gimple g = -gimple_build_assign_with_ops (POINTER_PLUS_EXPR, - make_ssa_name (cptr_type, NULL), - gimple_assign_lhs (str_arg_ssa), - len); + g = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, +make_ssa_name (cptr_type, NULL), +str_arg, +len); gimple_set_location (g, loc); gsi_insert_after (iter, g, GSI_NEW_STMT); @@ -2469,9 +2499,6 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) HOST_WIDE_INT real_size_in_bytes = size_in_bytes == -1 ? 1 : size_in_bytes; - tree uintptr_type -= build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (base)), 1); - tree shadow_ptr_type = shadow_ptr_types[real_size_in_bytes == 16 ? 1 : 0]; tree shadow_type = TREE_TYPE (shadow_ptr_type); @@ -2565,14 +2592,14 @@ asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) if (size_in_bytes == -1 !end_instrumented) { g = gimple_build_assign_with_ops (MINUS_EXPR, - make_ssa_name (uintptr_type, NULL), + make_ssa_name (pointer_sized_int_node, NULL), len, - build_int_cst (uintptr_type, 1)); +
[PATCH 16/17] Disable __asan_init calls for KASan
Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-10-03 Yury Gribov y.gri...@samsung.com * asan.c (asan_finish_file): Disable __asan_init calls for KASan; don't emit empty ctors. diff --git a/gcc/asan.c b/gcc/asan.c index 5c6d1c5..b8fbed3 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -2384,8 +2384,11 @@ asan_finish_file (void) nor after .LASAN* array. */ flag_sanitize = ~SANITIZE_ADDRESS; - tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT); - append_to_statement_list (build_call_expr (fn, 0), asan_ctor_statements); + if (flag_sanitize SANITIZE_USER_ADDRESS) +{ + tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT); + append_to_statement_list (build_call_expr (fn, 0), asan_ctor_statements); +} FOR_EACH_DEFINED_VARIABLE (vnode) if (TREE_ASM_WRITTEN (vnode-decl) asan_protect_global (vnode-decl)) @@ -2422,7 +2425,7 @@ asan_finish_file (void) DECL_INITIAL (var) = ctor; varpool_assemble_decl (varpool_node_for_decl (var)); - fn = builtin_decl_implicit (BUILT_IN_ASAN_REGISTER_GLOBALS); + tree fn = builtin_decl_implicit (BUILT_IN_ASAN_REGISTER_GLOBALS); tree gcount_tree = build_int_cst (pointer_sized_int_node, gcount); append_to_statement_list (build_call_expr (fn, 2, build_fold_addr_expr (var), @@ -2437,8 +2440,9 @@ asan_finish_file (void) cgraph_build_static_cdtor ('D', dtor_statements, MAX_RESERVED_INIT_PRIORITY - 1); } - cgraph_build_static_cdtor ('I', asan_ctor_statements, - MAX_RESERVED_INIT_PRIORITY - 1); + if (asan_ctor_statements) +cgraph_build_static_cdtor ('I', asan_ctor_statements, + MAX_RESERVED_INIT_PRIORITY - 1); flag_sanitize |= SANITIZE_ADDRESS; }
[PATCH 17/17] Enable __asan_loadN/__asan_storeN for KASan
Not a backport, specifically enables said instructions in KASan. Instrument unaligned objects in KASan. 2014-10-15 Yury Gribov y.gri...@samsung.com * asan.c (instrument_derefs): Enable unaligned path for KASan. diff --git a/gcc/asan.c b/gcc/asan.c index b8fbed3..db4e3a0 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1705,8 +1705,11 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, } size_in_bytes = int_size_in_bytes (type); - if ((size_in_bytes (size_in_bytes - 1)) != 0 - || (unsigned HOST_WIDE_INT) size_in_bytes - 1 = 16) + if (size_in_bytes = 0) +return; + if ((flag_sanitize SANITIZE_USER_ADDRESS) != 0 + ((size_in_bytes (size_in_bytes - 1)) != 0 + || (unsigned HOST_WIDE_INT) size_in_bytes - 1 = 16)) return; HOST_WIDE_INT bitsize, bitpos;
Re: [PATCH 2/17] Introduction of __asan_loadN/__asan_storeN
On Thu, Oct 16, 2014 at 12:36:15PM +0400, Yury Gribov wrote: I disabled __asan_loadN/__asan_storeN because 4.9's Asan runtime doesn't support them. In a later patch I re-enable these functions specifically for KAsan. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-05-30 Jakub Jelinek ja...@redhat.com * sanitizer.def (BUILT_IN_ASAN_REPORT_LOAD_N, BUILT_IN_ASAN_REPORT_STORE_N): New. * asan.c (struct asan_mem_ref): Change access_size type to HOST_WIDE_INT. (asan_mem_ref_init, asan_mem_ref_new, get_mem_refs_of_builtin_call, update_mem_ref_hash_table): Likewise. (asan_mem_ref_hasher::hash): Hash in a HWI. (report_error_func): Change size_in_bytes argument to HWI. Use *_N builtins if size_in_bytes is larger than 16 or not power of two. (build_shadow_mem_access): New function. (build_check_stmt): Use it. Change size_in_bytes argument to HWI. Handle size_in_bytes not power of two or larger than 16. (instrument_derefs): Don't give up if size_in_bytes is not power of two or is larger than 16. Ok. Jakub
Re: [PATCH 3/17] Instrumentation of unaligned types
On Thu, Oct 16, 2014 at 12:36:37PM +0400, Yury Gribov wrote: Further work on __asan_loadN/__asan_storeN. I removed the tests (misalign-1.c, misalign-2.c) because (as mentioned in comments for preceeding patch) __asan_loadN/__asan_storeN are disabled for userspace. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-05-30 Jakub Jelinek ja...@redhat.com * asan.c (report_error_func): Add SLOW_P argument, use BUILT_IN_ASAN_*_N if set. (build_check_stmt): Likewise. (instrument_derefs): If T has insufficient alignment, force same handling as for odd sizes. Ok. Jakub
Re: [PATCH 4/17] Outline instrumentation
On Thu, Oct 16, 2014 at 12:37:37PM +0400, Yury Gribov wrote: 4.9's Asan runtime library provides no support for these so I removed the tests. I've also changed default value of threshold to INT_MAX to completely disable outline instrumentation in userspace Asan. New asan-instrumentation-with-call-threshold parameter. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-16 Yury Gribov y.gri...@samsung.com * asan.c (check_func): New function. (maybe_create_ssa_name): Likewise. (build_check_stmt_with_calls): Likewise. (use_calls_p): Likewise. (report_error_func): Change interface. (build_check_stmt): Allow non-integer lengths; add support for new parameter. (asan_instrument): Likewise. (instrument_mem_region_access): Moved code to build_check_stmt. (instrument_derefs): Likewise. (instrument_strlen_call): Likewise. * cfgcleanup.c (old_insns_match_p): Add support for new functions. * doc/invoke.texi: Describe new parameter. * params.def: Define new parameter. * params.h: Likewise. * sanitizer.def: Describe new builtins. * c-c++-common/asan/instrument-with-calls-1.c: New test. * c-c++-common/asan/instrument-with-calls-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-1.c: Update test patterns. * c-c++-common/asan/no-redundant-instrumentation-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-4.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-5.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-6.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-7.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-8.c: Likewise. Ok. Jakub
Re: [PATCH 5/17] Fix bootstrap error
On Thu, Oct 16, 2014 at 12:38:00PM +0400, Yury Gribov wrote: Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-16 Yury Gribov y.gri...@samsung.com * asan.c (build_check_stmt): Fix maybe-uninitialized warning. Ok. Jakub
Re: [PATCH 6/17] Fix for PR 61530
On Thu, Oct 16, 2014 at 12:38:19PM +0400, Yury Gribov wrote: Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-18 Yury Gribov y.gri...@samsung.com PR sanitizer/61530 * asan.c (build_check_stmt): Add condition. * c-c++-common/asan/pr61530.c: New test. Ok. Note, for the patches that fix bugs in earlier patches, please commit them in one svn revision together with the patch that caused the bugs, so that 4.9 isn't broken unnecessarily. Jakub
Re: [PATCH 7/17] Fix for PR 61547
On Thu, Oct 16, 2014 at 12:38:51PM +0400, Yury Gribov wrote: Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-18 Yury Gribov y.gri...@samsung.com PR sanitizer/61547 * asan.c (instrument_strlen_call): Fixed instrumentation of trailing byte. * c-c++-common/asan/strlen-overflow-1.c: New test. Ok. Jakub
Re: [PATCH 8/17] Optimization of strlen instrumentation
On Thu, Oct 16, 2014 at 12:39:17PM +0400, Yury Gribov wrote: Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). Where? I don't see unsigned char being used anywhere in the patch. bool is certainly fine in GCC itself. Do not instrument first byte in strlen if already instrumented. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-06-24 Max Ostapenko m.ostape...@partner.samsung.com * asan.c (instrument_strlen_call): Do not instrument first byte in strlen if already instrumented. * c-c++-common/asan/no-redundant-instrumentation-9.c: New test. Ok. Jakub
Re: [PATCH 9/17] Initial KAsan support
On Thu, Oct 16, 2014 at 12:39:56PM +0400, Yury Gribov wrote: The patch was slightly updated to take care of missing UBSan work (SANITIZE_FLOAT_DIVIDE, SANITIZE_FLOAT_CAST, SANITIZE_BOUNDS). 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-07-31 Yury Gribov y.gri...@samsung.com * doc/cpp.texi (__SANITIZE_ADDRESS__): Updated description. * doc/invoke.texi (-fsanitize=kernel-address): Describe new option. * flag-types.h (SANITIZE_USER_ADDRESS, SANITIZE_KERNEL_ADDRESS): New enums. * gcc.c (sanitize_spec_function): Support new option. (SANITIZER_SPEC): Remove now redundant check. * opts.c (common_handle_option): Support new option. (finish_options): Check for incompatibilities. * toplev.c (process_options): Split userspace-specific checks. Ok. Jakub
Re: [PATCH 12/17] Fix off-by-one
On Thu, Oct 16, 2014 at 12:41:03PM +0400, Yury Gribov wrote: Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-12 Yury Gribov y.gri...@samsung.com * internal-fn.c (init_internal_fns): Fix off-by-one. Ok. Jakub
Re: [PATCH 10/17] Support fnspec for internal fns
On Thu, Oct 16, 2014 at 12:40:17PM +0400, Yury Gribov wrote: The patch was slightly updated to take care of missing UBSan work (UBSAN_BOUNDS). Added fnspec to internal functions. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-11 Yury Gribov y.gri...@samsung.com * gimple.c (gimple_call_fnspec): Support internal functions. (gimple_call_return_flags): Use const. * Makefile.in (GTFILES): Add internal-fn.h to list of GC files. * internal-fn.def: Add fnspec information. * internal-fn.h (internal_fn_fnspec): New function. (init_internal_fns): Declare new function. * internal-fn.c (internal_fn_fnspec_array): New global variable. (init_internal_fns): New function. * tree-core.h: Update macro call. * tree.c (build_common_builtin_nodes): Initialize internal fns. Ok. Jakub
Re: [PATCH 11/17] Move Asan instrumentation to sanopt pass
On Thu, Oct 16, 2014 at 12:40:44PM +0400, Yury Gribov wrote: The patch was slightly updated to take care of missing UBSan work (UBSAN_BOUNDS). Move inlining of Asan memory checks to sanopt pass. Change asan-instrumentation-with-call-threshold to more closely match LLVM. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-11 Yury Gribov y.gri...@samsung.com * asan.c (asan_check_flags): New enum. (build_check_stmt_with_calls): Removed function. (build_check_stmt): Split inlining logic to asan_expand_check_ifn. (instrument_derefs): Rename parameter. (instrument_mem_region_access): Rename parameter. (instrument_strlen_call): Likewise. (asan_expand_check_ifn): New function. (asan_instrument): Remove old code. (pass_sanopt::execute): Change handling of asan-instrumentation-with-call-threshold. (asan_clear_shadow): Fix formatting. (asan_function_start): Likewise. (asan_emit_stack_protection): Likewise. * doc/invoke.texi (asan-instrumentation-with-call-threshold): Update description. * internal-fn.c (expand_ASAN_CHECK): New function. * internal-fn.def (ASAN_CHECK): New internal function. * params.def (PARAM_ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD): Update description. (PARAM_ASAN_USE_AFTER_RETURN): Likewise. * tree.c: Small comment fix. * c-c++-common/asan/inc.c: Update test. * c-c++-common/asan/instrument-with-calls-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-1.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-2.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-3.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-4.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-5.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-6.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-7.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-8.c: Likewise. * c-c++-common/asan/no-redundant-instrumentation-9.c: Likewise. Ok. Jakub
Re: [PATCH 14/17] Fix test on 32-bit platforms
On Thu, Oct 16, 2014 at 12:41:46PM +0400, Yury Gribov wrote: Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-28 Yury Gribov y.gri...@samsung.com * c-c++-common/asan/pr62089.c: Fix test on 32-bit platforms. Ok. Jakub
Re: [PATCH 13/17] Fix for PR 62089
On Thu, Oct 16, 2014 at 12:41:24PM +0400, Yury Gribov wrote: Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-08-18 Yury Gribov y.gri...@samsung.com PR sanitizer/62089 * asan.c (instrument_derefs): Fix bitfield check. * c-c++-common/asan/pr62089.c: New test. * c-c++-common/asan/bitfield-1.c: New test. * c-c++-common/asan/bitfield-2.c: New test. * c-c++-common/asan/bitfield-3.c: New test. * c-c++-common/asan/bitfield-4.c: New test. Ok. Jakub
Re: [PATCH 16/17] Disable __asan_init calls for KASan
On Thu, Oct 16, 2014 at 12:42:28PM +0400, Yury Gribov wrote: Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-10-03 Yury Gribov y.gri...@samsung.com * asan.c (asan_finish_file): Disable __asan_init calls for KASan; don't emit empty ctors. Ok. Jakub
Re: [PATCH 15/17] Fix for PR 61897 and PR 62140
On Thu, Oct 16, 2014 at 12:42:09PM +0400, Yury Gribov wrote: Same as mainline. 2014-10-15 Yury Gribov y.gri...@samsung.com Backport from mainline 2014-09-01 Yury Gribov y.gri...@samsung.com PR sanitizer/61897 PR sanitizer/62140 * asan.c (asan_mem_ref_get_end): Handle non-ptroff_t lengths. (build_check_stmt): Likewise. (instrument_strlen_call): Likewise. (asan_expand_check_ifn): Likewise and fix types. (maybe_cast_to_ptrmode): New function. * c-c++-common/asan/pr62140-1.c: New test. * c-c++-common/asan/pr62140-2.c: New test. Ok. Jakub
Re: [PATCH 17/17] Enable __asan_loadN/__asan_storeN for KASan
On Thu, Oct 16, 2014 at 12:43:25PM +0400, Yury Gribov wrote: Not a backport, specifically enables said instructions in KASan. Instrument unaligned objects in KASan. 2014-10-15 Yury Gribov y.gri...@samsung.com * asan.c (instrument_derefs): Enable unaligned path for KASan. diff --git a/gcc/asan.c b/gcc/asan.c index b8fbed3..db4e3a0 100644 --- a/gcc/asan.c +++ b/gcc/asan.c @@ -1705,8 +1705,11 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, } size_in_bytes = int_size_in_bytes (type); - if ((size_in_bytes (size_in_bytes - 1)) != 0 - || (unsigned HOST_WIDE_INT) size_in_bytes - 1 = 16) + if (size_in_bytes = 0) +return; + if ((flag_sanitize SANITIZE_USER_ADDRESS) != 0 + ((size_in_bytes (size_in_bytes - 1)) != 0 + || (unsigned HOST_WIDE_INT) size_in_bytes - 1 = 16)) return; HOST_WIDE_INT bitsize, bitpos; Ok. Jakub
Re: NRV with address taken
On Thu, 16 Oct 2014, Richard Biener wrote: Does this fix PR63537? PR63537 is already fine for me with trunk, NRV replaces ret with retval everywhere. It does so even if I add f(ret); in the function with void f(vec*); I'd worry if both result and found are address taken before the pass, then trying to merge them together might mean something meant to have different addresses collapses into the same object. I'd not worry about that. But I think what the code tries to avoid is failing to adjust a use. But I can't think of a case that isn't handled if it properly replaces uses in address-taking operations (and asms). For example it fails to walk PHI nodes where var can appear as argument. Otherwise it relies on walk_gimple_op and walk_tree which should work. The other thing is aliasing though - if 'found' is TREE_ADDRESSABLE then points-to sets may contain 'found' but they are not adjusted to contain 'result' afterwards. Thus consider X a; X *p = a; a.x = 1; p-x = ...; ... = a.x; return a; where after replacing 'a' with 'result' p-x will no longer alias the store that now looks like result.x and thus we'd happily CSE result.x across the pointer store. Now NRV runs quite late but we do preserve points-to information to RTL (and RTL expansion handles stack slot sharing fine with points-to sets - but we'd need to handle NRV the same here). Ah, ok. It would be great to paste some of this in tree-nrv.c, unless you think it will be too much. -- Marc Glisse
Re: [PATCH 8/17] Optimization of strlen instrumentation
On 10/16/2014 12:49 PM, Jakub Jelinek wrote: On Thu, Oct 16, 2014 at 12:39:17PM +0400, Yury Gribov wrote: Difference from mainline: replaced non-C-friendly bool with unsigned char (see patch 0001 for explanation). Where? I don't see unsigned char being used anywhere in the patch. bool is certainly fine in GCC itself. Probably slipped from a neighbor patch :/ In summary all patches that have #include asan_interface.h have this quirk which I'll try to replace with #include stdbool.h per your suggestion. -Y
[committed] gnu11 fallout: ada
This fixes one test in the Ada testsuite (there may be more of these). Applying to trunk. 2014-10-16 Marek Polacek pola...@redhat.com * ada/acats/tests/cd/cd300051.c: Fix defaulting to int. diff --git gcc/testsuite/ada/acats/tests/cd/cd300051.c gcc/testsuite/ada/acats/tests/cd/cd300051.c index 5771fc8..86e60a0 100644 --- gcc/testsuite/ada/acats/tests/cd/cd300051.c +++ gcc/testsuite/ada/acats/tests/cd/cd300051.c @@ -48,7 +48,7 @@ -- CD30005.A (as comments). --! */ -int _cd30005_1( Value ) +int _cd30005_1(int Value ) { /* int Value */ Marek
[PATCH] Fix PR63554, move strncat_chk folding to GIMPLE
Another one (ok, I should really sit down and move all the remaining string related foldings). Bootstrap / regtest running on x86_64-unknown-linux-gnu. Richard. 2014-10-16 Richard Biener rguent...@suse.de PR middle-end/63554 * builtins.c (fold_builtin_4): Do not call fold_builtin_strncat_chk. (fold_builtin_strncat_chk): Move ... * gimple-fold.c (gimple_fold_builtin_strncat_chk): ... here. (gimple_fold_builtin): Call gimple_fold_builtin_strncat_chk. * gcc.dg/torture/pr63554.c: New testcase. Index: gcc/gimple-fold.c === --- gcc/gimple-fold.c (revision 216258) +++ gcc/gimple-fold.c (working copy) @@ -1632,6 +1632,62 @@ gimple_fold_builtin_strcat_chk (gimple_s return true; } +/* Fold a call to the __strncat_chk builtin with arguments DEST, SRC, + LEN, and SIZE. */ + +static bool +gimple_fold_builtin_strncat_chk (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + tree dest = gimple_call_arg (stmt, 0); + tree src = gimple_call_arg (stmt, 1); + tree len = gimple_call_arg (stmt, 2); + tree size = gimple_call_arg (stmt, 3); + tree fn; + const char *p; + + p = c_getstr (src); + /* If the SRC parameter is or if LEN is 0, return DEST. */ + if ((p *p == '\0') + || integer_zerop (len)) +{ + replace_call_with_value (gsi, dest); + return true; +} + + if (! tree_fits_uhwi_p (size)) +return false; + + if (! integer_all_onesp (size)) +{ + tree src_len = c_strlen (src, 1); + if (src_len + tree_fits_uhwi_p (src_len) + tree_fits_uhwi_p (len) + ! tree_int_cst_lt (len, src_len)) + { + /* If LEN = strlen (SRC), optimize into __strcat_chk. */ + fn = builtin_decl_explicit (BUILT_IN_STRCAT_CHK); + if (!fn) + return false; + + gimple repl = gimple_build_call (fn, 3, dest, src, size); + replace_call_with_call_and_fold (gsi, repl); + return true; + } + return false; +} + + /* If __builtin_strncat_chk is used, assume strncat is available. */ + fn = builtin_decl_explicit (BUILT_IN_STRNCAT); + if (!fn) +return false; + + gimple repl = gimple_build_call (fn, 3, dest, src, len); + replace_call_with_call_and_fold (gsi, repl); + return true; +} + /* Fold a call to the fputs builtin. ARG0 and ARG1 are the arguments to the call. IGNORE is true if the value returned by the builtin will be ignored. UNLOCKED is true is true if this @@ -2457,6 +2513,8 @@ gimple_fold_builtin (gimple_stmt_iterato return gimple_fold_builtin_sprintf_chk (gsi, DECL_FUNCTION_CODE (callee)); case BUILT_IN_STRCAT_CHK: return gimple_fold_builtin_strcat_chk (gsi); +case BUILT_IN_STRNCAT_CHK: + return gimple_fold_builtin_strncat_chk (gsi); case BUILT_IN_STRLEN: return gimple_fold_builtin_strlen (gsi); case BUILT_IN_STRCPY: Index: gcc/builtins.c === --- gcc/builtins.c (revision 216258) +++ gcc/builtins.c (working copy) @@ -198,7 +198,6 @@ static void maybe_emit_chk_warning (tree static void maybe_emit_sprintf_chk_warning (tree, enum built_in_function); static void maybe_emit_free_warning (tree); static tree fold_builtin_object_size (tree, tree); -static tree fold_builtin_strncat_chk (location_t, tree, tree, tree, tree, tree); static tree fold_builtin_printf (location_t, tree, tree, tree, bool, enum built_in_function); static tree fold_builtin_fprintf (location_t, tree, tree, tree, tree, bool, enum built_in_function); @@ -10366,9 +10365,6 @@ fold_builtin_4 (location_t loc, tree fnd switch (fcode) { -case BUILT_IN_STRNCAT_CHK: - return fold_builtin_strncat_chk (loc, fndecl, arg0, arg1, arg2, arg3); - case BUILT_IN_FPRINTF_CHK: case BUILT_IN_VFPRINTF_CHK: if (!validate_arg (arg1, INTEGER_TYPE) @@ -11584,58 +11580,6 @@ fold_builtin_object_size (tree ptr, tree return NULL_TREE; } -/* Fold a call to the __strncat_chk builtin with arguments DEST, SRC, - LEN, and SIZE. */ - -static tree -fold_builtin_strncat_chk (location_t loc, tree fndecl, - tree dest, tree src, tree len, tree size) -{ - tree fn; - const char *p; - - if (!validate_arg (dest, POINTER_TYPE) - || !validate_arg (src, POINTER_TYPE) - || !validate_arg (size, INTEGER_TYPE) - || !validate_arg (size, INTEGER_TYPE)) -return NULL_TREE; - - p = c_getstr (src); - /* If the SRC parameter is or if LEN is 0, return DEST. */ - if (p *p == '\0') -return omit_one_operand_loc (loc, TREE_TYPE (TREE_TYPE (fndecl)), dest, len); - else if (integer_zerop (len)) -return omit_one_operand_loc (loc, TREE_TYPE (TREE_TYPE (fndecl)), dest, src); - - if (! tree_fits_uhwi_p (size)) -return NULL_TREE; - - if (! integer_all_onesp (size)) -{ -
Re: [PATCH, i386]: Fix PR 59432, sync/atomic FAILs on 32bit x86 systems without .cfi directives
Uros Bizjak ubiz...@gmail.com writes: Hello! Now that %ebx is no more fixed, we can remove all PIC related complications in atomic_compare_and_swapdwi_doubleword pattern. The immediate consequence is, that we avoid hidden xchgs that clobbered unwinding state. Could also do the same in cpuid.h now -Andi -- a...@linux.intel.com -- Speaking for myself only
[PATCH, PR61605, 1/2] Handle copy cycles in pass_cprop_hardreg
Eric, this patch is the first half of the fix for PR61605. The problem it addresses is the following: Consider this copy cycle (a = b; b = a): ... (insn 2 18 3 2 (set (reg/v:SI 1 dx [orig:86 yD.1749 ] [86]) (reg:SI 5 di [ yD.1749 ])) test.c:9 90 {*movsi_internal} (expr_list:REG_DEAD (reg:SI 5 di [ yD.1749 ]) (nil))) (note 3 2 6 2 NOTE_INSN_FUNCTION_BEG) (insn 6 3 7 2 (set (reg:SI 5 di) (reg/v:SI 1 dx [orig:86 yD.1749 ] [86])) test.c:10 90 {*movsi_internal} (nil)) ... cprop_hardreg handles this currently in the following way: - it processes the first copy, and sets up di as representant of dx. - it then processes the second copy, and replaces the dx with di: ... (insn 6 3 7 2 (set (reg:SI 5 di) (reg:SI 5 di [orig:86 yD.1749 ] [86])) test.c:10 90 {*movsi_internal} (nil)) ... turning it into a noop. pass_fast_rtl_dce subsequently removes the noop. However, while processing the second copy, it considers the set of di in insn 6 as killing, and removes di as representant of dx. So a use of dx in a following insn is not replaced by di. By running pass_cprop_hardreg once more after pass_fast_rtl_dce, we do manage to replace the use of dx in a following insn by di. This patch achieves the same, without rerunning pass_cprop_hardreg. It ensures in copyprop_hardreg_forward_1 that the set of a dest by a noop is not considered killing. Bootstrapped and reg-tested on x86_64. OK for trunk? Thanks, - Tom 2014-10-13 Tom de Vries t...@codesourcery.com PR rtl-optimization/61605 * regcprop.c (copyprop_hardreg_forward_1): Add copy_p and noop_p. Don't notice stores for noops. Don't regard noops as copies. diff --git a/gcc/regcprop.c b/gcc/regcprop.c index 3297721..c71de98 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -1032,12 +1032,21 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) note_stores (PATTERN (insn), kill_clobbered_value, vd); } - /* Notice stores. */ - note_stores (PATTERN (insn), kill_set_value, ksvd); + bool copy_p = (set + REG_P (SET_DEST (set)) + REG_P (SET_SRC (set))); + bool noop_p = (copy_p + rtx_equal_p (SET_DEST (set), SET_SRC (set))); - /* Notice copies. */ - if (set REG_P (SET_DEST (set)) REG_P (SET_SRC (set))) - copy_value (SET_DEST (set), SET_SRC (set), vd); + if (!noop_p) + { + /* Notice stores. */ + note_stores (PATTERN (insn), kill_set_value, ksvd); + + /* Notice copies. */ + if (copy_p) + copy_value (SET_DEST (set), SET_SRC (set), vd); + } if (insn == BB_END (bb)) break; -- 1.9.1
Re: [PATCH, i386]: Fix PR 59432, sync/atomic FAILs on 32bit x86 systems without .cfi directives
On Thu, Oct 16, 2014 at 11:06 AM, Andi Kleen a...@firstfloor.org wrote: Now that %ebx is no more fixed, we can remove all PIC related complications in atomic_compare_and_swapdwi_doubleword pattern. The immediate consequence is, that we avoid hidden xchgs that clobbered unwinding state. Could also do the same in cpuid.h now I am just writing the patch submission ;) Uros.
[RFC PATCH, i386]: Remove special PIC related __cpuid definitions from config/i386/cpuid.h
Hello! Now that %ebx is also allocatable in PIC modes, we can cleanup config/i386/cpuid considerably. I propose to remove all PIC related specializations of __cpuid and __cpuid_count and protect the compilation with #if __GNUC__ = 5. The only drawback would be that non-bootstrapped build with gcc 5.0 will ignore -march=native, but I think this should be acceptable. Bootstrapped build will still work as expected. 2014-10-16 Uros Bizjak ubiz...@gmail.com * config/i386/cpuid.h (__cpuid): Remove definitions that handle %ebx register in a special way. (__cpuid_count): Ditto. * config/i386/driver-i386.h: Protect with #if __GNUC__ = 5. (host_detect_local_cpu): Mention that GCC that is able to handle %ebx register in PIC and non-PIC modes is required to compile the function. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Any comments? Uros. Index: config/i386/cpuid.h === --- config/i386/cpuid.h (revision 216282) +++ config/i386/cpuid.h (working copy) @@ -146,56 +146,7 @@ #define signature_VORTEX_ecx 0x436f5320 #define signature_VORTEX_edx 0x36387865 -#if defined(__i386__) defined(__PIC__) -/* %ebx may be the PIC register. */ -#if __GNUC__ = 3 #define __cpuid(level, a, b, c, d) \ - __asm__ (xchg{l}\t{%%}ebx, %k1\n\t \ - cpuid\n\t \ - xchg{l}\t{%%}ebx, %k1\n\t \ - : =a (a), =r (b), =c (c), =d (d)\ - : 0 (level)) - -#define __cpuid_count(level, count, a, b, c, d)\ - __asm__ (xchg{l}\t{%%}ebx, %k1\n\t \ - cpuid\n\t \ - xchg{l}\t{%%}ebx, %k1\n\t \ - : =a (a), =r (b), =c (c), =d (d)\ - : 0 (level), 2 (count)) -#else -/* Host GCCs older than 3.0 weren't supporting Intel asm syntax - nor alternatives in i386 code. */ -#define __cpuid(level, a, b, c, d) \ - __asm__ (xchgl\t%%ebx, %k1\n\t \ - cpuid\n\t \ - xchgl\t%%ebx, %k1\n\t \ - : =a (a), =r (b), =c (c), =d (d)\ - : 0 (level)) - -#define __cpuid_count(level, count, a, b, c, d)\ - __asm__ (xchgl\t%%ebx, %k1\n\t \ - cpuid\n\t \ - xchgl\t%%ebx, %k1\n\t \ - : =a (a), =r (b), =c (c), =d (d)\ - : 0 (level), 2 (count)) -#endif -#elif defined(__x86_64__) (defined(__code_model_medium__) || defined(__code_model_large__)) defined(__PIC__) -/* %rbx may be the PIC register. */ -#define __cpuid(level, a, b, c, d) \ - __asm__ (xchg{q}\t{%%}rbx, %q1\n\t \ - cpuid\n\t \ - xchg{q}\t{%%}rbx, %q1\n\t \ - : =a (a), =r (b), =c (c), =d (d)\ - : 0 (level)) - -#define __cpuid_count(level, count, a, b, c, d)\ - __asm__ (xchg{q}\t{%%}rbx, %q1\n\t \ - cpuid\n\t \ - xchg{q}\t{%%}rbx, %q1\n\t \ - : =a (a), =r (b), =c (c), =d (d)\ - : 0 (level), 2 (count)) -#else -#define __cpuid(level, a, b, c, d) \ __asm__ (cpuid\n\t \ : =a (a), =b (b), =c (c), =d (d) \ : 0 (level)) @@ -204,8 +155,8 @@ __asm__ (cpuid\n\t \ : =a (a), =b (b), =c (c), =d (d) \ : 0 (level), 2 (count)) -#endif + /* Return highest supported input value for cpuid instruction. ext can be either 0x0 or 0x800 to return highest supported value for basic or extended cpuid information. Function returns 0 if cpuid Index: config/i386/driver-i386.c === --- config/i386/driver-i386.c (revision 216282) +++ config/i386/driver-i386.c (working copy) @@ -24,7 +24,7 @@ along with GCC; see the file COPYING3. If not see const char *host_detect_local_cpu (int argc, const char **argv); -#ifdef __GNUC__ +#if __GNUC__ = 5 #include cpuid.h struct cache_desc @@ -942,9 +942,10 @@ done: } #else -/* If we aren't compiling with GCC then the driver will just ignore - -march and -mtune native target and will leave to the newly - built compiler to generate code for its default target. */ +/* If we aren't compiling with GCC that is able to handle %EBX + register in PIC and non-PIC modes, then the driver will just + ignore -march and -mtune native target and will leave to the + newly built compiler to generate code for its default target. */ const char *host_detect_local_cpu (int, const char **) {
Re: [RFC PATCH, i386]: Remove special PIC related __cpuid definitions from config/i386/cpuid.h
On Thu, Oct 16, 2014 at 11:19:49AM +0200, Uros Bizjak wrote: Hello! Now that %ebx is also allocatable in PIC modes, we can cleanup config/i386/cpuid considerably. I propose to remove all PIC related specializations of __cpuid and __cpuid_count and protect the compilation with #if __GNUC__ = 5. The only drawback would be that non-bootstrapped build with gcc 5.0 will ignore -march=native, but I think this should be acceptable. I'm worried about that. Can't you instead keep the current cpuid.h stuff as is, just add __GNUC__ 5 to that, so it treats GCC 5+ PIC as if __PIC__ wasn't defined? Or, at least use cpuid.h even for older GCC if __PIC__ is not defined (or __x86_64__ is defined and not medium/large PIC model)? Jakub
Re: NRV with address taken
On Thu, Oct 16, 2014 at 11:03 AM, Marc Glisse marc.gli...@inria.fr wrote: On Thu, 16 Oct 2014, Richard Biener wrote: Does this fix PR63537? PR63537 is already fine for me with trunk, NRV replaces ret with retval everywhere. It does so even if I add f(ret); in the function with void f(vec*); I'd worry if both result and found are address taken before the pass, then trying to merge them together might mean something meant to have different addresses collapses into the same object. I'd not worry about that. But I think what the code tries to avoid is failing to adjust a use. But I can't think of a case that isn't handled if it properly replaces uses in address-taking operations (and asms). For example it fails to walk PHI nodes where var can appear as argument. Otherwise it relies on walk_gimple_op and walk_tree which should work. The other thing is aliasing though - if 'found' is TREE_ADDRESSABLE then points-to sets may contain 'found' but they are not adjusted to contain 'result' afterwards. Thus consider X a; X *p = a; a.x = 1; p-x = ...; ... = a.x; return a; where after replacing 'a' with 'result' p-x will no longer alias the store that now looks like result.x and thus we'd happily CSE result.x across the pointer store. Now NRV runs quite late but we do preserve points-to information to RTL (and RTL expansion handles stack slot sharing fine with points-to sets - but we'd need to handle NRV the same here). Ah, ok. It would be great to paste some of this in tree-nrv.c, unless you think it will be too much. I think it would be great to integrate NRV with RTL expansion instead and thus handle the TREE_ADDRESSABLE case correct. (simply merge stack-slots of retval and 'found'!?) Richard. -- Marc Glisse
Re: [Bug libstdc++/63500] [4.9/5 Regression] bug in debug version of std::make_move_iterator?
On 15/10/14 22:06 +0200, François Dumont wrote: On 15/10/2014 13:10, Jonathan Wakely wrote: I find this much easier to read: #if __cplusplus 201103L typedef _Is_contiguous_sequence_Sequence __tag; #else using __lvalref = std::is_lvalue_reference typename std::iterator_traits_InputIterator::reference; using __contiguous = _Is_contiguous_sequence_Sequence; using __tag = typename std::conditional__lvalref::value, __contiguous, std::__false_type::type; #endif return __foreign_iterator_aux3(__it, __other, __other_end, __tag()); It only has one preprocessor condition and it avoids mismatched parentheses caused by opening the function parameter list once but closing it twice in two different branches. That's much better indeed. Shall we go with this ? Of course we are simply considering that we can't check for foreign iterators when some iterator adapters comes in-between. I hope one day to detect invalid usages even in this context. 2014-10-16 François Dumont fdum...@gcc.gnu.org PR libstdc++/63500 * include/debug/functions.h (__foreign_iterator_aux2): Do not check for foreign iterators if input iterators returns rvalue reference. * testsuite/23_containers/vector/63500.cc: New. François As this is a regression it should go on the 4.9 branch too. Index: include/debug/functions.h === --- include/debug/functions.h (revision 216279) +++ include/debug/functions.h (working copy) @@ -34,7 +34,7 @@ // _Iter_base #include bits/cpp_type_traits.h // for __is_integer #include bits/move.h// for __addressof and addressof -# include bits/stl_function.h// for less +#include bits/stl_function.h // for less #if __cplusplus = 201103L # include type_traits // for is_lvalue_reference and __and_ #endif @@ -252,8 +252,16 @@ const _InputIterator __other, const _InputIterator __other_end) { - return __foreign_iterator_aux3(__it, __other, __other_end, -_Is_contiguous_sequence_Sequence()); +#if __cplusplus 201103L + typedef _Is_contiguous_sequence_Sequence __tag; +#else + using __lvalref = std::is_lvalue_reference + typename std::iterator_traits_InputIterator::reference; + using __contiguous = _Is_contiguous_sequence_Sequence; + using __tag = typename std::conditional__lvalref::value, __contiguous, + std::__false_type::type; +#endif + return __foreign_iterator_aux3(__it, __other, __other_end, __tag()); } /* Handle the case where we aren't really inserting a range after all */ Index: testsuite/23_containers/vector/63500.cc === --- testsuite/23_containers/vector/63500.cc (revision 0) +++ testsuite/23_containers/vector/63500.cc (working copy) @@ -0,0 +1,39 @@ +// -*- C++ -*- + +// Copyright (C) 2014 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// http://www.gnu.org/licenses/. + +// { dg-options -std=gnu++11 } +// { dg-do compile } + +#include memory +#include iterator +#include debug/vector + +class Foo +{}; + +void +test01() +{ + __gnu_debug::vectorstd::unique_ptrFoo v; + __gnu_debug::vectorstd::unique_ptrFoo w; + + v.insert(end(v), + make_move_iterator(begin(w)), + make_move_iterator(end(w))); +}
gnu11 fallout: ia64
Tested on ia64-suse-linux, installed as obvious. Andreas. * gcc.target/ia64/20090324-1.c: Fix implicit declarations and implicit int. * gcc.target/ia64/mfused-madd-vect.c: Likewise. * gcc.target/ia64/mno-fused-madd-vect.c: Likewise. * gcc.target/ia64/pr29682.c: Likewise. * gcc.target/ia64/pr43603.c: Likewise. * gcc.target/ia64/pr48496.c: Likewise. diff --git a/gcc/testsuite/gcc.target/ia64/20090324-1.c b/gcc/testsuite/gcc.target/ia64/20090324-1.c index d9aff6a..c44e809 100644 --- a/gcc/testsuite/gcc.target/ia64/20090324-1.c +++ b/gcc/testsuite/gcc.target/ia64/20090324-1.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options -O3 -fmodulo-sched } */ +void update_screen (char *); static char *place_region_bounds_x, *place_region_bounds_y; static void read_place () { char msg[300]; diff --git a/gcc/testsuite/gcc.target/ia64/mfused-madd-vect.c b/gcc/testsuite/gcc.target/ia64/mfused-madd-vect.c index e166e85..5bf6976 100644 --- a/gcc/testsuite/gcc.target/ia64/mfused-madd-vect.c +++ b/gcc/testsuite/gcc.target/ia64/mfused-madd-vect.c @@ -6,7 +6,7 @@ specific vector add/sub instructions. So we just check for fpmpy. */ #define N 16 -extern bar(float *, float *, float *, float *); +extern void bar(float *, float *, float *, float *); void foo() { int i; diff --git a/gcc/testsuite/gcc.target/ia64/mno-fused-madd-vect.c b/gcc/testsuite/gcc.target/ia64/mno-fused-madd-vect.c index fd80d06..10b047b 100644 --- a/gcc/testsuite/gcc.target/ia64/mno-fused-madd-vect.c +++ b/gcc/testsuite/gcc.target/ia64/mno-fused-madd-vect.c @@ -6,7 +6,7 @@ specific vector add/sub instructions. So we just check for fpmpy. */ #define N 16 -extern bar(float *, float *, float *, float *); +extern void bar(float *, float *, float *, float *); void foo() { int i; diff --git a/gcc/testsuite/gcc.target/ia64/pr29682.c b/gcc/testsuite/gcc.target/ia64/pr29682.c index ecca323..5ffc789 100644 --- a/gcc/testsuite/gcc.target/ia64/pr29682.c +++ b/gcc/testsuite/gcc.target/ia64/pr29682.c @@ -12,7 +12,8 @@ typedef enum pgpArmor_e 5, PGPARMOR_PRIVKEY = 6, PGPARMOR_SECKEY = 7 } pgpArmor; -pgpCRC (const byte * octets, size_t len) +int b64decode (const char *, void **, size_t *); +unsigned int pgpCRC (const byte * octets, size_t len) { unsigned int crc = 0xb704ce; int i; @@ -26,7 +27,7 @@ pgpCRC (const byte * octets, size_t len) } } } -pgpReadPkts (const char *fn, const byte ** pkt, size_t * pktlen) +pgpArmor pgpReadPkts (const char *fn, const byte ** pkt, size_t * pktlen) { const byte *b = ((void *) 0); const char *enc = ((void *) 0); diff --git a/gcc/testsuite/gcc.target/ia64/pr43603.c b/gcc/testsuite/gcc.target/ia64/pr43603.c index ad3a5b1..92fc2c6 100644 --- a/gcc/testsuite/gcc.target/ia64/pr43603.c +++ b/gcc/testsuite/gcc.target/ia64/pr43603.c @@ -1,6 +1,8 @@ /* { dg-do compile } */ /* { dg-options -O3 } */ +int bar (int); +void car (long *, int *, int); int foo( long * np, int * dp, int qn) { diff --git a/gcc/testsuite/gcc.target/ia64/pr48496.c b/gcc/testsuite/gcc.target/ia64/pr48496.c index 6e60433..2548b0a 100644 --- a/gcc/testsuite/gcc.target/ia64/pr48496.c +++ b/gcc/testsuite/gcc.target/ia64/pr48496.c @@ -14,6 +14,7 @@ struct ia64_args UINT64 gp_regs[8]; }; +void ffi_call(long i, long gpcount, long fpcount, void **avalue) { struct ia64_args *stack; -- 2.1.2 -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 And now for something completely different.
gnu11 fallout: m68k
Tested on m68k-suse-linux and installed as obvious. Andreas. * gcc.target/m68k/crash1.c: Fix implicit declaration. diff --git a/gcc/testsuite/gcc.target/m68k/crash1.c b/gcc/testsuite/gcc.target/m68k/crash1.c index fdd737a..2554d62 100644 --- a/gcc/testsuite/gcc.target/m68k/crash1.c +++ b/gcc/testsuite/gcc.target/m68k/crash1.c @@ -15,6 +15,7 @@ struct kernel_stat unsigned irqs[256]; }; extern struct kernel_stat per_cpu__kstat; +void seq_printf (); void show_stat(void) { -- 2.1.2 -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 And now for something completely different.
Re: [Ping] Port of VTV for Cygwin and MinGW
On 09.10.2014 16:42, Kai Tietz wrote: Not approved: * gcc/cp/vtable-class-hierarchy.c Index: gcc/cp/vtable-class-hierarchy.c === --- gcc/cp/vtable-class-hierarchy.c(Revision 214408) +++ gcc/cp/vtable-class-hierarchy.c(Arbeitskopie) @@ -1182,7 +1182,7 @@ vtv_generate_init_routine (void) TREE_STATIC (vtv_fndecl) = 1; TREE_USED (vtv_fndecl) = 1; DECL_PRESERVE_P (vtv_fndecl) = 1; - if (flag_vtable_verify == VTV_PREINIT_PRIORITY) + if (flag_vtable_verify == VTV_PREINIT_PRIORITY !TARGET_PECOFF) You need to check that TARGET_PECOFF is defined. Otherwise you break compilation for none i386 targets. DECL_STATIC_CONSTRUCTOR (vtv_fndecl) = 0; gimplify_function_tree (vtv_fndecl); @@ -1190,7 +1190,7 @@ vtv_generate_init_routine (void) cgraph_process_new_functions (); - if (flag_vtable_verify == VTV_PREINIT_PRIORITY) + if (flag_vtable_verify == VTV_PREINIT_PRIORITY !TARGET_PECOFF) See above. Likewise assemble_vtv_preinit_initializer (vtv_fndecl); } * gcc/varasm.c Index: gcc/varasm.c === --- gcc/varasm.c(Revision 214408) +++ gcc/varasm.c(Arbeitskopie) @@ -2165,6 +2165,33 @@ assemble_variable (tree decl, int top_le DECL_NAME (decl)); in_section = sect; #else + /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here. + Therefore the following check is used. + In case a the target is PE or COFF a comdat group section + is created, e.g. .vtable_map_vars$foo. The linker places + everything in .vtable_map_vars at the end. + + A fix could be made in + gcc/config/i386/winnt.c: i386_pe_unique_section. */ + if (TARGET_PECOFF) You need to test, if TARGET_PECOFF is defined! + { +char *name; + +if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE) + name = ACONCAT ((sect-named.name, $, + IDENTIFIER_POINTER (DECL_NAME (decl)), NULL)); +else + name = ACONCAT ((sect-named.name, $, + IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))), + NULL)); + +targetm.asm_out.named_section (name, + sect-named.common.flags + | SECTION_LINKONCE, Here it seems to me that you have some whitespace issues, + DECL_NAME (decl)); +in_section = sect; +} +else switch_to_section (sect); #endif This has been changed in the attached patch. * libgcc/Makefile.in Looks ok to me. * libgcc/config.host Looks fine to me, too. * libiberty/obstack.c Why you use instead of C-runtime exit/abort-functions the platform-functions to terminate the process. This looks to me like useless change. For cygwin this might be even wrong in some aspects. What is the reasoning for this change? I haven't encountered crashes in obstack.c itself, but there were problems in vtv_rts.cc with abort() on MinGW 32bit. The following stack traces were taken at MinGW 32bit. Most of the time the process had to be stopped in the process manager because a wrong process handle was passed to NtTerminateProcess. This was tested and occurred on Windows 7 64bit and Windows 8.1 64bit. To be sure to avoid this issue the calls have also been exchanged in obstack.c. With abort(), correct process handle, postmortem debugger triggert. 0003 0028fe98 ntdll!NtTerminateProcess+0xc 0003 77e8f3b0 ntdll!RtlExitUserProcess+0x6d 0003 0028f934 74f85472 KERNEL32!ExitProcessImplementation+0x12 0003 11e9bfd9 00409000 msvcrt!exit+0x32 0003 0001 msvcrt!flushall+0x2e9 0003 00010001 0065 msvcrt!exit+0x11 6efcf294 0080 0028ffcc msvcrt!abort+0xf3 00560f70 000d 0001 libvtv_0!Z14__fortify_failPKc+0x18 7ffde000 0028ffdc 77568f8b test_std+0x13de 7ffde000 138a1dee KERNEL32!BaseThreadInitThunk+0xe 7755dad3 ntdll!__RtlUserThreadStart+0x20 004014e0 7ffde000 ntdll!_RtlUserThreadStart+0x1b With abort(), wrong process handle, NtTerminateProcess returns instead of ending the own process. This case happens most of the time. 0003 0028fe98 ntdll!NtTerminateProcess+0x5 0003 77e8f3b0 ntdll!RtlExitUserProcess+0x35 0003 0028f934 74f85472 KERNEL32!ExitProcessImplementation+0x12 0003 9f9f5ea3 00409000 msvcrt!exit+0x32 0003 0001 msvcrt!flushall+0x2e9 0003 00010001 0040 msvcrt!exit+0x11 6efcf294 0080 0028ffcc msvcrt!abort+0xf3 00701060 001e 0001 libvtv_0!Z14__fortify_failPKc+0x18 7ffde000 0028ffdc 77568f8b image0040+0x13de 7ffde000 9defd39c
Re: [PATCH i386 AVX512] [63.1/n] Add vpshufb, perm autogen (except for v64qi).
On 10 Oct 18:37, Uros Bizjak wrote: On Fri, Oct 10, 2014 at 5:47 PM, Ilya Tocar tocarip.in...@gmail.com wrote: Please recode that horrible first switch statement to: --cut here-- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; switch (mode) { case V8HImode: if (TARGET_AVX512VL TARGET_AVX152BW) gen = gen_avx512vl_vpermi2varv8hi3; break; ... case V2DFmode: if (TARGET_AVX512VL) { gen = gen_avx512vl_vpermi2varv2df3; maskmode = V2DImode; The patch is OK with the above improvement. Thanks, Uros. Will commit version below, if no objections in 24 hours. --- gcc/config/i386/i386.c | 292 ++--- gcc/config/i386/sse.md | 45 2 files changed, 255 insertions(+), 82 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index aedac19..e1228e3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21411,35 +21411,132 @@ ix86_expand_int_vcond (rtx operands[]) return true; } +/* AVX512F does support 64-byte integer vector operations, + thus the longest vector we are faced with is V64QImode. */ +#define MAX_VECT_LEN 64 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + enum machine_mode vmode; + unsigned char nelt; + bool one_operand_p; + bool testing_p; +}; + static bool -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1) +ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, + struct expand_vec_perm_d *d) { - enum machine_mode mode = GET_MODE (op0); + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + expander, so args are either in d, or in op0, op1 etc. */ + enum machine_mode mode = GET_MODE (d ? d-op0 : op0); + enum machine_mode maskmode = mode; + rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; + switch (mode) { +case V8HImode: + if (TARGET_AVX512VL TARGET_AVX512BW) + gen = gen_avx512vl_vpermi2varv8hi3; + break; +case V16HImode: + if (TARGET_AVX512VL TARGET_AVX512BW) + gen = gen_avx512vl_vpermi2varv16hi3; + break; +case V32HImode: + if (TARGET_AVX512BW) + gen = gen_avx512bw_vpermi2varv32hi3; + break; +case V4SImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv4si3; + break; +case V8SImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv8si3; + break; case V16SImode: - emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0, - force_reg (V16SImode, mask), - op1)); - return true; + if (TARGET_AVX512F) + gen = gen_avx512f_vpermi2varv16si3; + break; +case V4SFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv4sf3; + maskmode = V4SImode; + } + break; +case V8SFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv8sf3; + maskmode = V8SImode; + } + break; case V16SFmode: - emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0, - force_reg (V16SImode, mask), - op1)); - return true; + if (TARGET_AVX512F) + { + gen = gen_avx512f_vpermi2varv16sf3; + maskmode = V16SImode; + } + break; +case V2DImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv2di3; + break; +case V4DImode: + if (TARGET_AVX512VL) + gen = gen_avx512vl_vpermi2varv4di3; + break; case V8DImode: - emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0, - force_reg (V8DImode, mask), - op1)); - return true; + if (TARGET_AVX512F) + gen = gen_avx512f_vpermi2varv8di3; + break; +case V2DFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv2df3; + maskmode = V2DImode; + } + break; +case V4DFmode: + if (TARGET_AVX512VL) + { + gen = gen_avx512vl_vpermi2varv4df3; + maskmode = V4DImode; + } + break; case V8DFmode: - emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0, - force_reg (V8DImode, mask), - op1)); - return true; + if (TARGET_AVX512F) + { + gen = gen_avx512f_vpermi2varv8df3; + maskmode = V8DImode; + } + break; default: - return false; + break; } + + if (gen == NULL) +return false; + + /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const + expander, so args are either in d, or in op0, op1 etc. */ + if (d) +{ +
Re: [RFC PATCH, i386]: Remove special PIC related __cpuid definitions from config/i386/cpuid.h
On Thu, Oct 16, 2014 at 11:36 AM, Jakub Jelinek ja...@redhat.com wrote: Now that %ebx is also allocatable in PIC modes, we can cleanup config/i386/cpuid considerably. I propose to remove all PIC related specializations of __cpuid and __cpuid_count and protect the compilation with #if __GNUC__ = 5. The only drawback would be that non-bootstrapped build with gcc 5.0 will ignore -march=native, but I think this should be acceptable. I'm worried about that. Can't you instead keep the current cpuid.h stuff as is, just add __GNUC__ 5 to that, so it treats GCC 5+ PIC as if __PIC__ wasn't defined? Or, at least use cpuid.h even for older GCC if __PIC__ is not defined (or __x86_64__ is defined and not medium/large PIC model)? Do we really care that much about non-bootstrapped build? I don't see At least on Linux, driver-i386.c should not be built with PIC normally, so at least changing #if __GNUC__ = 5 to #if defined(__GNUC__) (__GNUC__ = 5 || !defined(__PIC__)) would limit the -march=native change for non-bootstrapped compilers to Darwin only (or what other targets use PIC by default?). Yes, this would work for me - the goal is to keep only one universal __cpuid (and __cpuid_count) define, and the above condition fits this goal. Uros.
[SH][committed] PR 53513 Make FPU mode switches not use __fpscr_values
Hi, As discussed in the PR, the __fpscr_values based FPU mode switching is replaced by other means, which preserve the other FPU mode and status bits across mode switches. For details please refer to the coments in the PR. Tested on sh-sim with -m4 -ml and -m4 -mb without new failures, except for the expected interrupt function handling breakage. As discussed in the PR those not so common cases will be fixed later. Committed as r216307. Cheers, Oleg gcc/ChangeLog PR target/53513 * config/sh/sh-protos.h (emit_sf_insn, emit_df_insn, expand_sf_unop, expand_sf_binop, expand_df_unop, expand_df_binop): Remove. * config/sh/sh.c (sh_emit_set_t_insn): Adjust generated insn pattern to match fp insn patterns. (calc_live_regs): Add FPSCR_MODES_REG and FPSCR_STAT_REG to the ignore list. (emit_sf_insn, emit_df_insn, expand_sf_unop, expand_sf_binop, expand_df_unop, expand_df_binop): Remove. (sh_conditional_register_usage): Mark FPSCR_MODES_REG and FPSCR_STAT_REG as not call clobbered. (sh_emit_mode_set): Emit fpscr store-modify-load sequence instead of invoking fpscr_set_from_mem. * config/sh/sh.h (MAX_REGISTER_NAME_LENGTH): Increase to 6. (SH_REGISTER_NAMES_INITIALIZER): Add names for FPSCR_MODES_REG and FPSCR_STAT_REG. (REGISTER_NAMES): Adjust. (SPECIAL_REGISTER_P): Add FPSCR_MODES_REG and FPSCR_STAT_REG. (FIRST_PSEUDO_REGISTER): Increase to 156. (DWARF_FRAME_REGISTERS): Define as 153 to keep the original value. (FIXED_REGISTERS, CALL_USED_REGISTERS): Add FPSCR_MODES_REG and FPSCR_STAT_REG. (REG_CLASS_CONTENTS): Adjust ALL_REGS bit mask to include FPSCR_MODES_REG and FPSCR_STAT_REG. (REG_ALLOC_ORDER): Add FPSCR_MODES_REG and FPSCR_STAT_REG. * config/sh/sh.md (FPSCR_MODES_REG, FPSCR_STAT_REG, FPSCR_PR, FPSCR_SZ): Add new constants. (UNSPECV_FPSCR_MODES, UNSPECV_FPSCR_STAT): Add new unspecv constants. (movpsi): Use TARGET_FPU_ANY condition, invoke gen_fpu_switch. (fpu_switch): Add use and set of FPSCR_STAT_REG and FPSCR_MODES_REG. Use TARGET_FPU_ANY condition. (fpu_switch peephole2): Remove. (fpu_switch split): Use simple_mem_operand to capture the mem and adjust split implementation. (extend_psi_si, truncate_si_psi): New insns. (toggle_sz, toggle_pr): Use FPSCR_SZ, FPSCR_PR constants. Add set of FPSCR_MODES_REG. (push_e, push_4, pop_e, pop_4, movdf_i4, reload_indf__frn, movsf_ie, reload_insf__frn, force_mode_for_call, calli, calli_tbr_rel, calli_pcrel, call_pcrel, call_compact, call_compact_rettramp, call_valuei, call_valuei_tbr_rel, call_valuei_pcrel, call_value_pcrel, call_value_compact, call_value_compact_rettramp, call, call_pop_compact, call_pop_compact_rettramp, call_value, sibcalli, sibcalli_pcrel, sibcalli_thunk, sibcall_pcrel, sibcall_compact, sibcall, sibcall_valuei, sibcall_valuei_pcrel, sibcall_value_pcrel, sibcall_value_compact, sibcall_value, call_value_pop_compact, call_value_pop_compact_rettramp, various unnamed splits): Replace use of FPSCR_REG with use of FPSCR_MODES_REG. Adjust gen_* function uses. (floatsisf2_i4, *floatsisf2_ie): Merge into floatsisf2_i4. (fix_truncsfsi2_i4, *fixsfsi): Merge into fix_truncsfsi2_i4. (cmpgtsf_t, cmpgtsf_t_i4): Merge into cmpgtsf_t. (cmpeqsf_t, cmpeqsf_t_i4): Merge into cmpeqsf_t. (ieee_ccmpeqsf_t, *ieee_ccmpeqsf_t_4): Merge into ieee_ccmpeqsf_t. (udivsi3_i4, divsi3_i4, addsf3_i, subsf3_i, mulsf3_i, fmasf4_i, *fmasf4, divsf3_i, floatsisf2_i4, fix_truncsfsi2_i4, cmpgtsf_t, cmpeqsf_t, ieee_ccmpeqsf_t, sqrtsf2_i, rsqrtsf2, fsca, adddf3_i, subdf3_i, muldf3_i, divdf3_i, floatsidf2_i, fix_truncdfsi2_i, cmpgtdf_t, cmpeqdf_t, *ieee_ccmpeqdf_t, sqrtdf2_i, extendsfdf2_i4, truncdfsf2_i4): Replace use of FPSCR_REG with clobber of FPSCR_STAT_REG and use of FPSCR_MODES_REG. Adjust gen_* function uses. gcc/testsuite/ChangeLog PR target/53513 * gcc.target/sh/pr54680.c: Adjust matching of lds insn. Index: gcc/config/sh/sh.c === --- gcc/config/sh/sh.c (revision 216300) +++ gcc/config/sh/sh.c (working copy) @@ -2281,20 +2281,20 @@ return t ^ (cmpval == cmpop); } -/* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */ - +/* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case + of floating-point comparisons. */ static void sh_emit_set_t_insn (rtx insn, enum machine_mode mode) { - if ((TARGET_SH4 || TARGET_SH2A) GET_MODE_CLASS (mode) == MODE_FLOAT) + if (TARGET_FPU_ANY GET_MODE_CLASS (mode) == MODE_FLOAT + GET_CODE (insn) != PARALLEL) { insn
[PATCH][match-and-simplify] More ternary commutative ops, canonicalize operand order before generic_simplify
This patch (also applicable to trunk) makes us canoncialize operand order for comparisons at the same time we canonicalize other operand order, in particular before dispatching to generic_simplify. It also adds operand canonicalization to ternary ops and adds FMA_EXPR and DOT_PROD_EXPR to the list of ternary commutative ops. Bootstrap and regtest running on match-and-simplify branch and x86_64-unknown-linux-gnu. Richard. 2014-10-16 Richard Biener rguent...@suse.de * fold-const.c (fold_comparison): Remove redundant constant folding and operand swapping. (fold_binary_loc): Do comparison operand swapping here, dispatch to generic_simplify after operand canonicalization. (fold_ternary_loc): Canonicalize operand order for commutative ternary operations. * tree.c (commutative_ternary_tree_code): Add DOT_PROD_EXPR and FMA_EXPR. Index: gcc/fold-const.c === --- gcc/fold-const.c(revision 216262) +++ gcc/fold-const.c(working copy) @@ -8726,14 +8726,6 @@ fold_comparison (location_t loc, enum tr STRIP_SIGN_NOPS (arg0); STRIP_SIGN_NOPS (arg1); - tem = fold_relational_const (code, type, arg0, arg1); - if (tem != NULL_TREE) -return tem; - - /* If one arg is a real or integer constant, put it last. */ - if (tree_swap_operands_p (arg0, arg1, true)) -return fold_build2_loc (loc, swap_tree_comparison (code), type, op1, op0); - /* Transform comparisons of the form X +- C1 CMP C2 to X CMP C2 -+ C1. */ if ((TREE_CODE (arg0) == PLUS_EXPR || TREE_CODE (arg0) == MINUS_EXPR) (equality_code || TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg0))) @@ -9914,17 +9906,23 @@ fold_binary_loc (location_t loc, } } - extern tree generic_simplify (enum tree_code, tree, tree, tree); - tem = generic_simplify (code, type, op0, op1); - if (tem) -return tem; - /* If this is a commutative operation, and ARG0 is a constant, move it to ARG1 to reduce the number of tests below. */ if (commutative_tree_code (code) tree_swap_operands_p (arg0, arg1, true)) return fold_build2_loc (loc, code, type, op1, op0); + /* Likewise if this is a comparison, and ARG0 is a constant, move it + to ARG1 to reduce the number of tests below. */ + if (kind == tcc_comparison + tree_swap_operands_p (arg0, arg1, true)) +return fold_build2_loc (loc, swap_tree_comparison (code), type, op1, op0); + + extern tree generic_simplify (enum tree_code, tree, tree, tree); + tem = generic_simplify (code, type, op0, op1); + if (tem) +return tem; + /* ARG0 is the first operand of EXPR, and ARG1 is the second operand. First check for cases where an arithmetic operation is applied to a @@ -13809,6 +13807,12 @@ fold_ternary_loc (location_t loc, enum t gcc_assert (IS_EXPR_CODE_CLASS (kind) TREE_CODE_LENGTH (code) == 3); + /* If this is a commutative operation, and OP0 is a constant, move it + to OP1 to reduce the number of tests below. */ + if (commutative_ternary_tree_code (code) + tree_swap_operands_p (op0, op1, true)) +return fold_build3_loc (loc, code, type, op1, op0, op2); + extern tree generic_simplify (enum tree_code, tree, tree, tree, tree); tem = generic_simplify (code, type, op0, op1, op2); if (tem) Index: gcc/tree.c === --- gcc/tree.c (revision 216244) +++ gcc/tree.c (working copy) @@ -7380,6 +7380,8 @@ commutative_ternary_tree_code (enum tree { case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: +case DOT_PROD_EXPR: +case FMA_EXPR: return true; default:
Re: [PATCH i386 AVX512] [63.1/n] Add vpshufb, perm autogen (except for v64qi).
On Thu, Oct 16, 2014 at 02:23:16PM +0400, Ilya Tocar wrote: On 10 Oct 18:37, Uros Bizjak wrote: On Fri, Oct 10, 2014 at 5:47 PM, Ilya Tocar tocarip.in...@gmail.com wrote: Please recode that horrible first switch statement to: --cut here-- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; switch (mode) { case V8HImode: if (TARGET_AVX512VL TARGET_AVX152BW) gen = gen_avx512vl_vpermi2varv8hi3; break; ... case V2DFmode: if (TARGET_AVX512VL) { gen = gen_avx512vl_vpermi2varv2df3; maskmode = V2DImode; The patch is OK with the above improvement. Thanks, Uros. Will commit version below, if no objections in 24 hours. No need to wait, it is ok now (with proper ChangeLog of course). Jakub
Re: [PATCH 4/n] OpenMP 4.0 offloading infrastructure: lto-wrapper
On 15 Oct 16:38, Jakub Jelinek wrote: Done. But it turned out that the gcc_GAS_CHECK_FEATURE from gcc/configure.ac: gcc_GAS_CHECK_FEATURE([.section with e], gcc_cv_as_section_has_e, [2,22,51],, [.section foo1,e .byte 0,0,0,0]) does not work properly. Maybe it works on cygwin* | pe | mingw32* | interix* targets, but on linux with GNU as version 2.20.51 (which doesn't support exclude sections) it successfully assembles conftest.s into conftest.o (with warnings) and HAVE_GAS_SECTION_EXCLUDE becomes defined. IMHO a version check is wrong (except when using in-tree gas). I'd suggest just to use [--fatal-warnings] as the 4th argument to gcc_GAS_CHECK_FEATURE feature, after all, that is what e.g. gcc_cv_as_shf_merge testing already uses. Fixed. Patch is updated and retested. Thanks, -- Ilya gcc/ * configure: Regenerate. * configure.ac: Move the test for section attribute specifier e in GAS out to all i[34567]86-*-* | x86_64-*-* targets and add --fatal-warnings. * gcc.c (spec_host_machine, accel_dir_suffix): New variables. (process_command): Tweak path construction for the possibility of being configured as an offload compiler. (driver::maybe_putenv_OFFLOAD_TARGETS): New function. (driver::main): Call maybe_putenv_OFFLOAD_TARGETS. (driver::set_up_specs): Tweak path construction for the possibility of being configured as an offload compiler. * langhooks.c (lhd_begin_section): Set SECTION_EXCLUDE flag. * lto-wrapper.c (OFFLOAD_TARGET_NAMES_ENV): Define. (offload_names, offloadbegin, offloadend): New static variables. (free_array_of_ptrs, parse_env_var, access_check, compile_offload_image) (compile_images_for_offload_targets, copy_file, find_offloadbeginend): New static functions. (run_gcc): Determine whether offload sections are present. If so, run compile_images_for_offload_targets and return the names of new generated objects to linker. If there are offload sections, but no LTO sections, then return the copies of input objects without link-time recompilation. * varasm.c (default_elf_asm_named_section): Guard SECTION_EXCLUDE with ifdef HAVE_GAS_SECTION_EXCLUDE. lto-plugin/ * lto-plugin.c (OFFLOAD_SECTION, OFFLOAD_SECTION_LEN): Define. (struct plugin_objfile): Add new field offload. (process_offload_section): New static function. (claim_file_handler): Claim file if it contains offload sections. --- diff --git a/gcc/configure b/gcc/configure index ff1e398..4ef208c 100755 --- a/gcc/configure +++ b/gcc/configure @@ -24722,9 +24722,12 @@ $as_echo $as_me: WARNING: LTO for $target requires binutils = 2.20.1, but vers ;; esac fi - # Test if the assembler supports the section flag 'e' for specifying - # an excluded section. - { $as_echo $as_me:${as_lineno-$LINENO}: checking assembler for .section with e 5 + ;; +esac + +# Test if the assembler supports the section flag 'e' for specifying +# an excluded section. +{ $as_echo $as_me:${as_lineno-$LINENO}: checking assembler for .section with e 5 $as_echo_n checking assembler for .section with e... 6; } if test ${gcc_cv_as_section_has_e+set} = set; then : $as_echo_n (cached) 6 @@ -24737,7 +24740,7 @@ fi elif test x$gcc_cv_as != x; then $as_echo '.section foo1,e .byte 0,0,0,0' conftest.s -if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s 5' +if { ac_try='$gcc_cv_as $gcc_cv_as_flags --fatal-warnings -o conftest.o conftest.s 5' { { eval echo \\$as_me\:${as_lineno-$LINENO}: \$ac_try\; } 5 (eval $ac_try) 25 ac_status=$? @@ -24760,8 +24763,6 @@ cat confdefs.h _ACEOF #define HAVE_GAS_SECTION_EXCLUDE `if test $gcc_cv_as_section_has_e = yes; then echo 1; else echo 0; fi` _ACEOF - ;; -esac { $as_echo $as_me:${as_lineno-$LINENO}: checking assembler for filds and fists mnemonics 5 $as_echo_n checking assembler for filds and fists mnemonics... 6; } diff --git a/gcc/configure.ac b/gcc/configure.ac index 05a55f4..0f4bfc6 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -3837,18 +3837,19 @@ foo:nop ;; esac fi - # Test if the assembler supports the section flag 'e' for specifying - # an excluded section. - gcc_GAS_CHECK_FEATURE([.section with e], gcc_cv_as_section_has_e, - [2,22,51],, -[.section foo1,e -.byte 0,0,0,0]) - AC_DEFINE_UNQUOTED(HAVE_GAS_SECTION_EXCLUDE, - [`if test $gcc_cv_as_section_has_e = yes; then echo 1; else echo 0; fi`], - [Define if your assembler supports specifying the section flag e.]) ;; esac +# Test if the assembler supports the section flag 'e' for specifying +# an excluded section. +gcc_GAS_CHECK_FEATURE([.section with e],
Re: [PATCH i386 AVX512] [79/n] Extend expand_mul_widen_hilo.
On Thu, Oct 16, 2014 at 8:28 AM, Kirill Yukhin kirill.yuk...@gmail.com wrote: Hello, This patch extends expand_mul_widen_hilo to 512-bit QI,SI,HI modes. Bootstrapped and regtested gcc/ * config/i386/i386.c (ix86_expand_mul_widen_hilo): Handle V32HI, V16SI, V64QI modes. Is it ok for trunk? OK. Thanks, Uros.
Re: [PATCH i386 AVX512] [78/n] Use blend for inserting.
On Thu, Oct 16, 2014 at 9:28 AM, Jakub Jelinek ja...@redhat.com wrote: On Thu, Oct 16, 2014 at 10:24:45AM +0400, Kirill Yukhin wrote: Hello, This patch extends insertion hook. AVX-512* tests on top of patch-set all pass under simulator. gcc/ * config/i386/i386.c (ix86_expand_vector_set): Handle V8DF, V8DI, V16SF, V16SI, V32HI, V64QI modes. Just a ChangeLog comment style (seen in several entries you've committed and several posted patches). Please don't put a line break right after the filename if the (functionname): part fits nicely on the same line, the description can be wrapped anywhere as appropriate. In this case, * config/i386/i386.c (ix86_expand_vector_set): Handle V8DF, V8DI, V16SF, V16SI, V32HI, V64QI modes. is shorter and more readable. Other than that, this particular patch LGTM (unless we'd want for the 4 mostly repetitious cases add a common handling spot, which would need the gen fnpointer and kmode vars set before goto), but I'll leave it to Uros to ack it. Let's leave this as it is for now. OK. Thanks, Uros.
Re: update address taken: don't drop clobbers
On Wed, Oct 15, 2014 at 6:08 PM, Jeff Law l...@redhat.com wrote: On 10/15/14 08:35, Marc Glisse wrote: Would that extra pass be acceptable? Ugh, rather not. We have too many passes ;) Otherwise, what do you think should be responsible for cleaning up the dead assignments? Does anyone have an opinion on which side needs to be improved? As a reminder: - we have a va_list with its address taken by va_start/va_end. - fab lowers va_start/va_end and the list doesn't have its address taken anymore. - update_address_taken replaces the clobber: list =v {}; with an assignment of an undefined value: list_6 = list_2(D); - uninit warns about this. Some possible directions: - prematurely optimize in update_address_taken so we don't generate the useless assignment. - add a dce pass before uninit. I tend to land on the side of minimizing false positives, so the comment about PR18501 is a don't care to me. If the optimizers remove a dead assignment and we no longer warn about a potential uninitialized use in the dead assignment, then I consider that good. Not everyone agrees with that way of thinking, obviously. So my inclination would be to evaluate independent of the pr18501 issues. ie, what's the compile-time cost vs runtime benefit of running DCE here. I'm guessing there's little runtime benefit for this particular case. So my next line of thinking would be can we arrange to conditionally run DCE? ie, have update_address_taken signal that it did something that has a reasonable chance of exposing dead code and only run DCE in that case. Obviously this only helps if it rarely signals :-) I don't think we have any infrastructure for this right now. Finally I'd look at how difficult it would be to have update_address_taken cleanup after itself. If the LHS is in SSA form, then if we find it has no uses, can we just remove the assignment completely? It doesn't even know that it has no uses (the variable still needs to be written into SSA form). OTOH it is a missed DSE opportunity before update-address-taken? As of premature optimization - into-SSA could notice it created SSA names with no uses and trigger a fast DCE. Btw, I wonder what this odd folding of variadic builtins is about, and why it is not done in the stdarg pass (and only there), which would be earlier. Richard. jeff
Re: [PATCH 4/n] OpenMP 4.0 offloading infrastructure: lto-wrapper
On Thu, Oct 16, 2014 at 03:17:36PM +0400, Ilya Verbin wrote: On 15 Oct 16:38, Jakub Jelinek wrote: Done. But it turned out that the gcc_GAS_CHECK_FEATURE from gcc/configure.ac: gcc_GAS_CHECK_FEATURE([.section with e], gcc_cv_as_section_has_e, [2,22,51],, [.section foo1,e .byte 0,0,0,0]) does not work properly. Maybe it works on cygwin* | pe | mingw32* | interix* targets, but on linux with GNU as version 2.20.51 (which doesn't support exclude sections) it successfully assembles conftest.s into conftest.o (with warnings) and HAVE_GAS_SECTION_EXCLUDE becomes defined. IMHO a version check is wrong (except when using in-tree gas). I'd suggest just to use [--fatal-warnings] as the 4th argument to gcc_GAS_CHECK_FEATURE feature, after all, that is what e.g. gcc_cv_as_shf_merge testing already uses. Fixed. Patch is updated and retested. Can you please extract the configure{,.ac}, langhooks.c and varasm.c bits into a separate patch? That is preapproved for trunk right now, that isn't dependent on anything else. The rest LGTM, but please run it through LTO review (Richard/Honza) too. Jakub
Re: [RFC] Compile-time gimple-checking (again)
On Wed, Oct 15, 2014 at 6:15 PM, David Malcolm dmalc...@redhat.com wrote: Back in March I posted an 89-patch kit to expand and make use of the gimple statement class hierarchy to move much of the type-checking of statement accessors to be at compile-time rather than run-time: https://gcc.gnu.org/ml/gcc-patches/2014-04/msg01148.html I'd like to get these patches into trunk in some form before stage 1 closes. I'll attempt to summarize the earlier discussion about these patches; please forgive me if I'm mischaracterizing things. There was some discussion about what the resulting classes and API should look like. Jeff reviewed the patches and approved them (modulo some issues that I've fixed), conditional on resolving the API design issues that arose in the discussion - and on holding off until 4.9.1 was released. Richi wanted me to change gimple to be the base class, rather than being a typedef of a *pointer* to the base class: https://gcc.gnu.org/ml/gcc-patches/2014-04/msg01520.html thus avoiding numerous typedefs for all of the subclasses, in their const and non-const variants. i.e. the pointerness of the type becomes explicit; everywhere we currently have a: gimple stmt; that's implicitly a ptr, we would instead have: gimple *stmt; making the pointer explicit. After some discussion about whether we wanted to keep the gimple_ prefix for the various subclasses , I posted an email with various ideas as to what the API could look like: * Status quo * The April 2014 patch series (with indirect use of is-a.h) * Direct use of is-a.h, retaining typedefs of pointers * Explicit pointers, rather than typedefs * Implicit naming * Namespaces (explicit) * Namespaces (implicit) * C++ references (without namespaces) * C++ references (with implicit namespaces) See https://gcc.gnu.org/ml/gcc-patches/2014-04/msg01562.html for the full examples. There was a followup discussion about whether we should convert the accessors to be *methods* rather than functions, but Richi felt that was a step too far at this time: https://gcc.gnu.org/ml/gcc-patches/2014-04/msg01824.html Jeff asked Richi (in https://gcc.gnu.org/ml/gcc-patches/2014-04/msg02082.html ): Anyway, gazillion new typedefs are ugly :/ (typedefs are ugly) Yea, can't argue with that. However, do we want to ask David to fix up the gimple vs gimple * vs const gimple * vs const gimple as a prerequisite for this patchset or are you comfortable going forward with the patchset, then researching if there's a cleaner way to handle the const/typedef issues? Richi responded (in https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00064.html): Well, I'd like to see both and one affects the other. Doing the const correctness thing first seems more natural to me. Of course both need to wait for 4.9.1. I posted a possible renaming of the gimple subclasses as: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00128.html incorporating a mass-renaming, and changing the pointerness of the gimple type to be explicit rather than implicit. After some discussion about what the types should be named Jakub suggested a simple g prefix: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00248.html giving a class hierarchy like this: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00346.html Jeff agreed: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00347.html Richi agreed: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00349.html (did I correctly summarize things?) Yes. I first attempted to implement this by doing the pointerness conversion first, to avoid lots of const_ typedefs, then updating the other patches. i.e. a big autogenerated patch, followed by 89 handwritten patches. I only managed about 1/3 of the kit, and what I had bitrotted very quickly. Based on that, I don't think it's workable to do the big automated gimple - gimple * conversion upfront. I think two workable approaches are: (i) save any big automated conversions until last (ii) do an automated conversion on the .patch files themselves I've updated the patch series; they can be seen at: https://dmalcolm.fedorapeople.org/gcc/patch-backups/gimple-classes/v11-patches/ This contains the patches from before. I've rebased them against r216157 from Monday (2014-10-13) aka fc222f445c6108418196a1b48703d350f3c3d45a. This required numerous essentially mechanical changes to the patches e.g. for the big reworking of cgraph functions to be methods. I've been working on the assumption that these various changes aren't going to require a re-review. I also removed the unloved as_a/dyn_cast methods from the gimple base class in favor of as_a/dyn_cast functions from is-a.h I've successfully bootstrapped and regtested the cumulative effect of the patchkit on Fedora 20 x86_64. Some questions: Are people still happy with the proposed naming from: https://gcc.gnu.org/ml/gcc-patches/2014-05/msg00346.html
Re: [PATCH] PR lto/61048 Write/read option -fsanitize to/from object files
On Wed, Oct 15, 2014 at 4:47 PM, Ilya Palachev i.palac...@samsung.com wrote: Hi all, The attached patch fixes PR lto/61048. The basic idea is to write option -fsanitize to existing ELF section .gnu.lto_.opts in object files and then read it in lto-wrapper. On 15.10.2014 12:46, Richard Biener wrote: You need to handle them in lto-opts.c and output them to the existing option section. 2 minor changes are added to existing function that write options (in lto-opts.c) and then read them from object files (lto-wrapper.c). The patch was bootstrapped and regtested on x86_64-unknown-linux-gnu. Ok for trunk? Ok. Note that for mismatched options you'll now get the first chosen. Which could be -fno-sanitize=address. Also if you build one file with -fsanitize=address and one with -fsanitize=undefined you'll get either but not both enabled. So the patch works for the simple cases but it will likely require more complex handling in lto-wrapper.c:merge_and_complain to do something sensible for mismatches (and not treat -fsanitize=address equal to -fsanitize=undefined). Btw, similar merging issues probably exist for -fsanitize-recover, -fsanitize-undefined-trap-on-error, or do they apply during instrumentation already? Thanks, Richard. Best regards, Ilya Palachev
Re: [RFC, PATCH]: Introduction of callgraph annotation class
I don't like generic annotation facilities at all. Would it be possible Why? to make cgraph UIDs not sparse? (keep a free-list of cgraph nodes cgraph nodes are already kept dense via freelist. However in WPA you usually have a lot of different nodes prior merging and unreachable code removal and very few afterwards, the number of nodes grows again with inlining. Depending on what you want to store for values, I guess either vector or hashtable is good choice - if you want to keep data that needs to be duplicated per inline clone you can rely on density. If you want data on few function bodies, you will likely use hash... Honza with UID cgraph_max_uid, only really free nodes at the end) Using a different data structure than a vector indexed by cgraph UID should also be easily possible (a map from UID to data, hash_map int, T). Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On 10/16/2014 01:31 PM, Richard Biener wrote: On Wed, Oct 15, 2014 at 6:26 PM, Martin Liška mli...@suse.cz wrote: Hello. Following patch introduces a new class called callgraph_annotation. Idea behind the patch is to provide a generic interface one can use to register custom info related to a cgraph_node. As you know, symbol_table provides hooks for creation, deletion and duplication of a cgraph_node. If you have a pass, you need to handle all these hooks and store custom data in your data structure. As an example, after discussion with Martin, I chose usage in ipa-prop.h: data structure: vecipa_node_params ipa_node_params_vector if the pass handles an event, following chunk is executed: if (ipa_node_params_vector.length () = (unsigned) symtab-cgraph_max_uid) ipa_node_params_vector.safe_grow_cleared (symtab-cgraph_max_uid + 1); The problem is that you can have sparse UIDs of cgraph_nodes and every time you have to allocate a vector of size equal to cgraph_max_uid. As a replacement, I implemented first version of cgraph_annotation that internally uses hash_mapcgraph_unique_identifier, T. Every time a node is deleted, we remove corresponding data associated to the node. What do you think about it? I don't like generic annotation facilities at all. Would it be possible to make cgraph UIDs not sparse? (keep a free-list of cgraph nodes with UID cgraph_max_uid, only really free nodes at the end) Using a different data structure than a vector indexed by cgraph UID should also be easily possible (a map from UID to data, hash_map int, T). Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. Martin Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On Thu, Oct 16, 2014 at 1:40 PM, Jan Hubicka hubi...@ucw.cz wrote: I don't like generic annotation facilities at all. Would it be possible Why? Because it's the way to hell if the IL has magic things only one pass can understand. It can't ever know if it may invalidate some of that data. Same reason why I dislike the -aux pointers we have. (even if they are of course convenient) to make cgraph UIDs not sparse? (keep a free-list of cgraph nodes cgraph nodes are already kept dense via freelist. However in WPA you usually have a lot of different nodes prior merging and unreachable code removal and very few afterwards, the number of nodes grows again with inlining. Depending on what you want to store for values, I guess either vector or hashtable is good choice - if you want to keep data that needs to be duplicated per inline clone you can rely on density. If you want data on few function bodies, you will likely use hash... Honza with UID cgraph_max_uid, only really free nodes at the end) Using a different data structure than a vector indexed by cgraph UID should also be easily possible (a map from UID to data, hash_map int, T). Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On Thu, Oct 16, 2014 at 1:42 PM, Martin Liška mli...@suse.cz wrote: On 10/16/2014 01:31 PM, Richard Biener wrote: On Wed, Oct 15, 2014 at 6:26 PM, Martin Liška mli...@suse.cz wrote: Hello. Following patch introduces a new class called callgraph_annotation. Idea behind the patch is to provide a generic interface one can use to register custom info related to a cgraph_node. As you know, symbol_table provides hooks for creation, deletion and duplication of a cgraph_node. If you have a pass, you need to handle all these hooks and store custom data in your data structure. As an example, after discussion with Martin, I chose usage in ipa-prop.h: data structure: vecipa_node_params ipa_node_params_vector if the pass handles an event, following chunk is executed: if (ipa_node_params_vector.length () = (unsigned) symtab-cgraph_max_uid) ipa_node_params_vector.safe_grow_cleared (symtab-cgraph_max_uid + 1); The problem is that you can have sparse UIDs of cgraph_nodes and every time you have to allocate a vector of size equal to cgraph_max_uid. As a replacement, I implemented first version of cgraph_annotation that internally uses hash_mapcgraph_unique_identifier, T. Every time a node is deleted, we remove corresponding data associated to the node. What do you think about it? I don't like generic annotation facilities at all. Would it be possible to make cgraph UIDs not sparse? (keep a free-list of cgraph nodes with UID cgraph_max_uid, only really free nodes at the end) Using a different data structure than a vector indexed by cgraph UID should also be easily possible (a map from UID to data, hash_map int, T). Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. I don't think there is anything that forces us to retain the original UID allocation after WPA merging? So why not compact it? Richard. Martin Richard. Thank you, Martin
[committed] gnu11 fallout: graphite
This fixes fallout of graphite tests. Applying to trunk. 2014-10-16 Marek Polacek pola...@redhat.com * gcc.dg/graphite/id-1.c: Fix defaulting to int. * gcc.dg/graphite/id-2.c: Likewise. * gcc.dg/graphite/id-4.c: Likewise. * gcc.dg/graphite/id-8.c: Likewise. * gcc.dg/graphite/id-pr45230-1.c: Likewise. * gcc.dg/graphite/id-pr45230.c: Likewise. * gcc.dg/graphite/id-pr45231.c: Likewise. * gcc.dg/graphite/pr38073.c: Likewise. * gcc.dg/graphite/pr38125.c: Likewise. * gcc.dg/graphite/pr38409.c: Likewise. * gcc.dg/graphite/pr38413.c: Likewise. * gcc.dg/graphite/pr38510.c: Likewise. * gcc.dg/graphite/pr38786.c: Likewise. * gcc.dg/graphite/pr39260.c: Likewise. * gcc.dg/graphite/scop-19.c: Likewise. * gcc.dg/graphite/id-13.c: Fix implicit declarations. * gcc.dg/graphite/id-17.c: Likewise. * gcc.dg/graphite/id-23.c: Likewise. * gcc.dg/graphite/id-26.c: Likewise. * gcc.dg/graphite/id-pr43464-1.c: Likewise. * gcc.dg/graphite/pr37485.c: Likewise. * gcc.dg/graphite/pr38500.c: Likewise. * gcc.dg/graphite/pr42284.c: Likewise. * gcc.dg/graphite/pr42914.c: Likewise. * gcc.dg/graphite/pr46404-1.c: Likewise. * gcc.dg/graphite/pr60979.c: Likewise. * gcc.dg/graphite/id-pr43464.c: Fix implicit declarations and defaulting to int. diff --git gcc/testsuite/gcc.dg/graphite/id-1.c gcc/testsuite/gcc.dg/graphite/id-1.c index 70b477a..1085c4a 100644 --- gcc/testsuite/gcc.dg/graphite/id-1.c +++ gcc/testsuite/gcc.dg/graphite/id-1.c @@ -1,5 +1,6 @@ typedef int *lambda_vector; typedef lambda_vector *lambda_matrix; +void lambda_vector_add_mc (lambda_vector vec1, int const1, lambda_vector vec2, int const2, lambda_vector vec3, int size) @@ -8,6 +9,7 @@ lambda_vector_add_mc (lambda_vector vec1, int const1, for (i = 0; i size; i++) vec3[i] = const1 * vec1[i] + const2 * vec2[i]; } +void lambda_matrix_add_mc (lambda_matrix mat1, int const1, lambda_matrix mat2, int const2, lambda_matrix mat3, int m, int n) diff --git gcc/testsuite/gcc.dg/graphite/id-13.c gcc/testsuite/gcc.dg/graphite/id-13.c index e921cd4..0dfafb8 100644 --- gcc/testsuite/gcc.dg/graphite/id-13.c +++ gcc/testsuite/gcc.dg/graphite/id-13.c @@ -1,3 +1,5 @@ +void bar (int); + void foo (int N, int k, int *fb) { diff --git gcc/testsuite/gcc.dg/graphite/id-17.c gcc/testsuite/gcc.dg/graphite/id-17.c index c93c211..68a7bb5 100644 --- gcc/testsuite/gcc.dg/graphite/id-17.c +++ gcc/testsuite/gcc.dg/graphite/id-17.c @@ -11,6 +11,7 @@ typedef struct } ImageParameters; ImageParameters *img; +void error (const char *); void GenerateSequenceParameterSet(seq_parameter_set_rbsp_t *sps) { diff --git gcc/testsuite/gcc.dg/graphite/id-2.c gcc/testsuite/gcc.dg/graphite/id-2.c index c11f6a2..c416822 100644 --- gcc/testsuite/gcc.dg/graphite/id-2.c +++ gcc/testsuite/gcc.dg/graphite/id-2.c @@ -1,4 +1,5 @@ typedef _Complex float GFC_COMPLEX_4; +void matmul_c4 () { int x, n, count; diff --git gcc/testsuite/gcc.dg/graphite/id-23.c gcc/testsuite/gcc.dg/graphite/id-23.c index 0f8a1f3..3b62d91 100644 --- gcc/testsuite/gcc.dg/graphite/id-23.c +++ gcc/testsuite/gcc.dg/graphite/id-23.c @@ -1,3 +1,4 @@ +void SubMtx_blockDiagonalInfo (int **); double *ZV_entries (void); void SubMtx_fillRowZV (int irow) { diff --git gcc/testsuite/gcc.dg/graphite/id-26.c gcc/testsuite/gcc.dg/graphite/id-26.c index 507d159..e6f64e5 100644 --- gcc/testsuite/gcc.dg/graphite/id-26.c +++ gcc/testsuite/gcc.dg/graphite/id-26.c @@ -1,3 +1,4 @@ +int abs (int); int find_sad_16x16(int *intra_mode) { int current_intra_sad_2,best_intra_sad2; diff --git gcc/testsuite/gcc.dg/graphite/id-4.c gcc/testsuite/gcc.dg/graphite/id-4.c index 38f6738..71d4b17 100644 --- gcc/testsuite/gcc.dg/graphite/id-4.c +++ gcc/testsuite/gcc.dg/graphite/id-4.c @@ -1,4 +1,5 @@ -extern a[]; +extern int a[]; +void g () { int i, b; diff --git gcc/testsuite/gcc.dg/graphite/id-8.c gcc/testsuite/gcc.dg/graphite/id-8.c index 1a278c1..25405cf 100644 --- gcc/testsuite/gcc.dg/graphite/id-8.c +++ gcc/testsuite/gcc.dg/graphite/id-8.c @@ -1,4 +1,5 @@ int blah; +int foo() { int i; diff --git gcc/testsuite/gcc.dg/graphite/id-pr43464-1.c gcc/testsuite/gcc.dg/graphite/id-pr43464-1.c index d5a8359..8e6ac20 100644 --- gcc/testsuite/gcc.dg/graphite/id-pr43464-1.c +++ gcc/testsuite/gcc.dg/graphite/id-pr43464-1.c @@ -3,6 +3,7 @@ typedef struct regnode char flags; } regnode; extern const unsigned char A[]; +int bar (regnode *); char *foo (regnode *c, char *s, int norun) { diff --git gcc/testsuite/gcc.dg/graphite/id-pr43464.c gcc/testsuite/gcc.dg/graphite/id-pr43464.c index a8e67dc..b9cf7c8 100644 --- gcc/testsuite/gcc.dg/graphite/id-pr43464.c +++ gcc/testsuite/gcc.dg/graphite/id-pr43464.c @@ -13,6 +13,9 @@ typedef struct
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On Thu, Oct 16, 2014 at 1:40 PM, Jan Hubicka hubi...@ucw.cz wrote: I don't like generic annotation facilities at all. Would it be possible Why? Because it's the way to hell if the IL has magic things only one pass can understand. It can't ever know if it may invalidate some of that data. Well, this is mostly indented for maintaining WHOPR summaries where we already have infrastructure to keep them up to date just the APIs are somewhat unhandy (having to register hooks all the time) I also think we can put there sparse stuff that is part of IL but you do not want to allocate it for every symbol/decl or statement (like EH regions or not stuff in symbol table that can not be easily handled by non-multiple inheritance) Honza Same reason why I dislike the -aux pointers we have. (even if they are of course convenient) to make cgraph UIDs not sparse? (keep a free-list of cgraph nodes cgraph nodes are already kept dense via freelist. However in WPA you usually have a lot of different nodes prior merging and unreachable code removal and very few afterwards, the number of nodes grows again with inlining. Depending on what you want to store for values, I guess either vector or hashtable is good choice - if you want to keep data that needs to be duplicated per inline clone you can rely on density. If you want data on few function bodies, you will likely use hash... Honza with UID cgraph_max_uid, only really free nodes at the end) Using a different data structure than a vector indexed by cgraph UID should also be easily possible (a map from UID to data, hash_map int, T). Richard. Thank you, Martin
[PATCH] Simple improvement for predicate computation in if-convert phase.
Hi All, Here is a simple enhancement for predicate computation in if-convert phase: We use notion of cd equivalence to get simpler predicate for join block, e.g. if join block has 2 predecessors with predicates p1 p2 and p1 !p2, we'd like to get p1 for it instead of p1 p2 | p1 !p2. Bootstrap and regression testing did not show any new failures. Is it OK for trunk? gcc/ChangeLog 2014-10-16 Yuri Rumyantsev ysrum...@gmail.com * tree-if-conv.c (add_to_predicate_list): Check unconditionally that bb is always executed to early exit. Use predicate of cd-equivalent block for join blocks if it exists. (if_convertible_loop_p_1): Recompute POST_DOMINATOR tree. (tree_if_conversion): Free post-dominance information. if-conv.patch Description: Binary data
[PATCH i386 AVX512] [80/n] Extend expand_sse2_mulvxdi3.
Hello, This patch extends expand_sse2_mulvxdi3. Bootstrapped. AVX-512* tests on top of patch-set all pass under simulator. Is it ok for trunk? gcc/ * config/i386/i386.c (ix86_expand_sse2_mulvxdi3): Extend expand_sse2_mulvxdi3. -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1ee947a..945bc8d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -45667,7 +45667,19 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) enum machine_mode mode = GET_MODE (op0); rtx t1, t2, t3, t4, t5, t6; - if (TARGET_XOP mode == V2DImode) + if (TARGET_AVX512DQ) +{ + if (mode == V8DImode) + emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); + else if (TARGET_AVX512VL) + { + if (mode == V4DImode) + emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); + else if (mode == V2DImode) + emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); + } +} + else if (TARGET_XOP mode == V2DImode) { /* op1: A,B,C,D, op2: E,F,G,H */ op1 = gen_lowpart (V4SImode, op1);
[committed] gnu11 fallout: powerpc
Markus reported on IRC that gnu11 default broke these two tests on powerpc. Applying to trunk. 2014-10-16 Marek Polacek pola...@redhat.com * gcc.target/powerpc/pr58673-1.c: Fix defaulting to int. * gcc.target/powerpc/pr58673-2.c: Add declarations. diff --git gcc/testsuite/gcc.target/powerpc/pr58673-1.c gcc/testsuite/gcc.target/powerpc/pr58673-1.c index 6f7838f..3149c8c 100644 --- gcc/testsuite/gcc.target/powerpc/pr58673-1.c +++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c @@ -65,6 +65,7 @@ deduce_conversion (from, to) void bc_expand_binary_operation (optab, resulttype, arg0, arg1) + int resulttype, arg0, arg1; struct binary_operator optab[]; { int i, besti, cost, bestcost; diff --git gcc/testsuite/gcc.target/powerpc/pr58673-2.c gcc/testsuite/gcc.target/powerpc/pr58673-2.c index b70d2ee..ab84670 100644 --- gcc/testsuite/gcc.target/powerpc/pr58673-2.c +++ gcc/testsuite/gcc.target/powerpc/pr58673-2.c @@ -90,6 +90,9 @@ typedef struct t_coupl_LJ *tcLJ; } t_coupl_rec; +void xvgr_legend (); +real calc_deviation (); +void pr_dev (); static void pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile, t_filenm fnm[]) Marek
Re: [RFC, PATCH]: Introduction of callgraph annotation class
Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. I don't think there is anything that forces us to retain the original UID allocation after WPA merging? So why not compact it? We could, if we have way to update the summaries that are currently UID allocated. With annotation template we could have handle to do that more easily than diving into each of passes maintaining summaries by hand. On the other hand it still does not make the records quite dense in cases 1) you do not want to have separate records for clones because you know clones and master are identical 2) you care only about definitions ... At some point we discussed introducing separate UIDs for those but that was also not very welcome (and I agree we already have bit too many UIDs for functions - DECL_UID, node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite finished it. Honza Richard. Martin Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On 10/16/2014 02:01 PM, Jan Hubicka wrote: Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. I don't think there is anything that forces us to retain the original UID allocation after WPA merging? So why not compact it? We could, if we have way to update the summaries that are currently UID allocated. With annotation template we could have handle to do that more easily than diving into each of passes maintaining summaries by hand. Agree with that, I will be central point one can implement these optimizations. One idea is to implement lazy allocation where we can allocate memory just in case someone calls annotation::get method. On the other hand it still does not make the records quite dense in cases 1) you do not want to have separate records for clones because you know clones and master are identical It would be quite easy to implement annotation::get_for_origin (int clone_id), where we find origin for the clone and return data associated to such origin node. 2) you care only about definitions Maybe similar stuff? Martin ... At some point we discussed introducing separate UIDs for those but that was also not very welcome (and I agree we already have bit too many UIDs for functions - DECL_UID, node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite finished it. Honza Richard. Martin Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On 10/16/2014 02:05 PM, Martin Liška wrote: On 10/16/2014 02:01 PM, Jan Hubicka wrote: Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. I don't think there is anything that forces us to retain the original UID allocation after WPA merging? So why not compact it? We could, if we have way to update the summaries that are currently UID allocated. With annotation template we could have handle to do that more easily than diving into each of passes maintaining summaries by hand. Agree with that, I will be central point one can implement these optimizations. s/I/it One idea is to implement lazy allocation where we can allocate memory just in case someone calls annotation::get method. On the other hand it still does not make the records quite dense in cases 1) you do not want to have separate records for clones because you know clones and master are identical It would be quite easy to implement annotation::get_for_origin (int clone_id), where we find origin for the clone and return data associated to such origin node. 2) you care only about definitions Maybe similar stuff? Martin ... At some point we discussed introducing separate UIDs for those but that was also not very welcome (and I agree we already have bit too many UIDs for functions - DECL_UID, node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite finished it. Honza Richard. Martin Richard. Thank you, Martin
Re: Check that unlinked uses do not contain ssa-names when renaming.
On 16-10-14 10:14, Richard Biener wrote: On Thu, Oct 16, 2014 at 9:20 AM, Tom de Vries tom_devr...@mentor.com wrote: On 08/10/12 11:24, Richard Guenther wrote: On Sun, Oct 7, 2012 at 12:44 PM, Tom de Vries tom_devr...@mentor.com wrote: Richard, attached patch checks that unlinked uses do not contain ssa-names when renaming. This assert triggers when compiling (without the fix) the PR54735 example. AFAIU, it was due to chance that we caught the PR54735 bug by hitting the verification failure, because the new vdef introduced by renaming happened to be the same name as the ssa name referenced in the invalid unlinked use (in terms of maybe_replace_use: rdef == use). The assert from this patch catches all cases that an unlinked use contains an ssa-name. Bootstrapped and reg-tested on x86_64 (Ada inclusive). OK for trunk? I don't think that is exactly what we should assert here ... (I thought about adding checking myself ...). What we'd want to assert is that before any new DEF is registered (which may re-allocate an SSA name) that no uses with SSA_NAME_IN_FREELIST appear. Thus, a light verification pass would be necessary at the beginning of update_ssa (which I queued onto my TODO list ...). We'd want that anyway to for example catch the case where a non-virtual operand is partially renamed. Richard, while developing a patch, I ran into the same 'no immediate_use list' verification error again, caused by an unlinked use containing an ssa-name. The verification error was caused by an error in my patch, but triggered by chance, by an unrelated change in the patch. I've tried to implement the 'light verification pass' you describe above, and I've checked that the error in my patch is found, also when I remove the trigger for the verification error from my patch. Bootstrapped and reg-tested on x86_64 (with the ENABLE_CHECKING guarding removed, in order to ensure the code is active). OK for trunk? Ok with changing the gcc_assert to if (SSA_NAME_IN_FREE_LIST (use)) { error (statement uses released SSA name); debug_gimple_stmt (stmt); err = true; } and after checking all stmts if (err) internal_error (cannot update SSA form); you might want to push/pop TV_TREE_STMT_VERIFY around all this as well. Richard, I've implemented the changes listed above, and also made the message a bit more verbose: ... kernels-2.c: In function ‘main’: kernels-2.c:41:5: error: statement uses released SSA name for (COUNTERTYPE ii = 0; ii N; ii++) ^ # .MEM_57 = VDEF .MEM_79 .omp_data_arr.10 ={v} {CLOBBER}; The use of .MEM_79 should have been replaced or marked for renaming kernels-2.c:41:5: internal compiler error: cannot update SSA from ... I've added mentioning the specific use that has the problem, since it will not always be evident which is the one with the problem. OK for trunk? If that's too verbose I can also implement instead: ... kernels-2.c:41:5: error: statement uses released SSA name .MEM_79 ... Thanks, - Tom 2014-10-16 Tom de Vries t...@codesourcery.com * tree-into-ssa.c (update_ssa): Assert that there's no ssa use operand with SSA_NAME_IN_FREELIST. diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c index 01203de..dcfba3c 100644 --- a/gcc/tree-into-ssa.c +++ b/gcc/tree-into-ssa.c @@ -3161,6 +3161,47 @@ update_ssa (unsigned update_flags) if (!need_ssa_update_p (cfun)) return; +#ifdef ENABLE_CHECKING + timevar_push (TV_TREE_STMT_VERIFY); + + bool err = false; + + FOR_EACH_BB_FN (bb, cfun) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (gsi)) + { + gimple stmt = gsi_stmt (gsi); + + ssa_op_iter i; + use_operand_p use_p; + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_ALL_USES) + { + tree use = USE_FROM_PTR (use_p); + if (TREE_CODE (use) != SSA_NAME) + continue; + + if (SSA_NAME_IN_FREE_LIST (use)) + { + error (statement uses released SSA name:); + debug_gimple_stmt (stmt); + fprintf (stderr, The use of ); + print_generic_expr (stderr, use, 0); + fprintf (stderr, + should have been replaced or marked for renaming + \n); + err = true; + } + } + } +} + + if (err) +internal_error (cannot update SSA form); + + timevar_pop (TV_TREE_STMT_VERIFY); +#endif + timevar_push (TV_TREE_SSA_INCREMENTAL); if (dump_file (dump_flags TDF_DETAILS)) -- 1.9.1
Re: Check that unlinked uses do not contain ssa-names when renaming.
On Thu, Oct 16, 2014 at 2:11 PM, Tom de Vries tom_devr...@mentor.com wrote: On 16-10-14 10:14, Richard Biener wrote: On Thu, Oct 16, 2014 at 9:20 AM, Tom de Vries tom_devr...@mentor.com wrote: On 08/10/12 11:24, Richard Guenther wrote: On Sun, Oct 7, 2012 at 12:44 PM, Tom de Vries tom_devr...@mentor.com wrote: Richard, attached patch checks that unlinked uses do not contain ssa-names when renaming. This assert triggers when compiling (without the fix) the PR54735 example. AFAIU, it was due to chance that we caught the PR54735 bug by hitting the verification failure, because the new vdef introduced by renaming happened to be the same name as the ssa name referenced in the invalid unlinked use (in terms of maybe_replace_use: rdef == use). The assert from this patch catches all cases that an unlinked use contains an ssa-name. Bootstrapped and reg-tested on x86_64 (Ada inclusive). OK for trunk? I don't think that is exactly what we should assert here ... (I thought about adding checking myself ...). What we'd want to assert is that before any new DEF is registered (which may re-allocate an SSA name) that no uses with SSA_NAME_IN_FREELIST appear. Thus, a light verification pass would be necessary at the beginning of update_ssa (which I queued onto my TODO list ...). We'd want that anyway to for example catch the case where a non-virtual operand is partially renamed. Richard, while developing a patch, I ran into the same 'no immediate_use list' verification error again, caused by an unlinked use containing an ssa-name. The verification error was caused by an error in my patch, but triggered by chance, by an unrelated change in the patch. I've tried to implement the 'light verification pass' you describe above, and I've checked that the error in my patch is found, also when I remove the trigger for the verification error from my patch. Bootstrapped and reg-tested on x86_64 (with the ENABLE_CHECKING guarding removed, in order to ensure the code is active). OK for trunk? Ok with changing the gcc_assert to if (SSA_NAME_IN_FREE_LIST (use)) { error (statement uses released SSA name); debug_gimple_stmt (stmt); err = true; } and after checking all stmts if (err) internal_error (cannot update SSA form); you might want to push/pop TV_TREE_STMT_VERIFY around all this as well. Richard, I've implemented the changes listed above, and also made the message a bit more verbose: ... kernels-2.c: In function ‘main’: kernels-2.c:41:5: error: statement uses released SSA name for (COUNTERTYPE ii = 0; ii N; ii++) ^ # .MEM_57 = VDEF .MEM_79 .omp_data_arr.10 ={v} {CLOBBER}; The use of .MEM_79 should have been replaced or marked for renaming ^^^ or marked for renaming is not correct, only replacing is kernels-2.c:41:5: internal compiler error: cannot update SSA from ... I've added mentioning the specific use that has the problem, since it will not always be evident which is the one with the problem. OK for trunk? Ok with ajdusting the message. Thanks RIchard. If that's too verbose I can also implement instead: ... kernels-2.c:41:5: error: statement uses released SSA name .MEM_79 ... Thanks, - Tom
[SH][committed] Make GBR call preserved by default
Hi, This is not entirely related to PR 59401, but the discussion came up there. The attached patch makes the GBR on SH call preserved by default. Tested by Kaz on sh4-linux, committed as r216314. Cheers, Oleg gcc/ChangeLog: PR target/59401 * config/sh/sh.h (CALL_REALLY_USED_REGISTERS): Expand macro and set GBR to 0. Index: gcc/config/sh/sh.h === --- gcc/config/sh/sh.h (revision 216313) +++ gcc/config/sh/sh.h (working copy) @@ -821,9 +821,42 @@ 1, 1, 1, 1, \ } -/* TARGET_CONDITIONAL_REGISTER_USAGE might want to make a register - call-used, yet fixed, like PIC_OFFSET_TABLE_REGNUM. */ -#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS +/* CALL_REALLY_USED_REGISTERS is used as a default setting, which is then + overridden by -fcall-saved-* and -fcall-used-* options and then by + TARGET_CONDITIONAL_REGISTER_USAGE. There we might want to make a + register call-used, yet fixed, like PIC_OFFSET_TABLE_REGNUM. */ +#define CALL_REALLY_USED_REGISTERS \ +{ \ +/* Regular registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. \ + Only the lower 32bits of R10-R14 are guaranteed to be preserved \ + across SH5 function calls. */ \ + 0, 0, 0, 0, 0, 0, 0, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 1, 1, 1, 1, \ +/* FP registers. */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/* Branch target registers. */ \ + 1, 1, 1, 1, 1, 0, 0, 0, \ +/* XD registers. */ \ + 1, 1, 1, 1, 1, 1, 0, 0, \ +/*gbr, ap, pr, t,mach, macl, fpul, fpscr, */ \ + 0, 1, 1, 1, 1, 1, 1, 1, \ +/*rap, sfp,fpscr0,fpscr1 */ \ + 1, 1, 0, 0, \ +} /* Only the lower 32-bits of R10-R14 are guaranteed to be preserved across SHcompact function calls. We can't tell whether a called
Re: [PATCH i386 AVX512] [80/n] Extend expand_sse2_mulvxdi3.
On Thu, Oct 16, 2014 at 1:55 PM, Kirill Yukhin kirill.yuk...@gmail.com wrote: Hello, This patch extends expand_sse2_mulvxdi3. Bootstrapped. AVX-512* tests on top of patch-set all pass under simulator. Is it ok for trunk? gcc/ * config/i386/i386.c (ix86_expand_sse2_mulvxdi3): Extend expand_sse2_mulvxdi3. -- Thanks, K diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1ee947a..945bc8d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -45667,7 +45667,19 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) enum machine_mode mode = GET_MODE (op0); rtx t1, t2, t3, t4, t5, t6; - if (TARGET_XOP mode == V2DImode) + if (TARGET_AVX512DQ) +{ + if (mode == V8DImode) + emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); + else if (TARGET_AVX512VL) + { + if (mode == V4DImode) + emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); + else if (mode == V2DImode) + emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); Should this be v2di ? + } +} + else if (TARGET_XOP mode == V2DImode) { /* op1: A,B,C,D, op2: E,F,G,H */ op1 = gen_lowpart (V4SImode, op1); Please use function pointers in the added part. Thanks, Uros.
[SH][committed] Fix typo in PR 63260 testcase
Hi, I've committed the attached obvious patch as r216317. Cheers, Oleg gcc/testsuite/ChangeLog: PR target/63260 * gcc.target/sh/pr63260.c: Fix typo __builtin_fabs vs. __builtin_abs. Index: gcc/testsuite/gcc.target/sh/pr63260.c === --- gcc/testsuite/gcc.target/sh/pr63260.c (revision 216313) +++ gcc/testsuite/gcc.target/sh/pr63260.c (working copy) @@ -26,5 +26,5 @@ double test_3 (double x) { - return __builtin_abs (x); + return __builtin_fabs (x); }
[PATCH v2] Fix pr61848, linux kernel miscompile
(I've taken Alan's patch and added two new testcases.) This patch cures the linux kernel boot failure when compiled using trunk gcc. At its heart, the problem is caused by merge_decls merging from the old decl to the new decl, then copying back to the old decl and discarding the new. When Jan moved some fields to the symtab, copying back to the old decl was lost for those fields. Really, it would be best if merge_decls was rewritten to merge everything to the kept decl, but here I'm just doing that for fields accessed via decl_with_vis.symtab_node. Tested on powerpc64-unknown-linux-gnu. OK for trunk? Thanks. 2014-10-16 Alan Modra amo...@gmail.com gcc/c/ PR middle-end/61848 * c-decl.c (merge_decls): Don't merge section name or tls model to newdecl symtab node, instead merge to olddecl. Override existing olddecl section name. Set tls_model for all thread-local vars, not just OMP thread-private ones. Remove incorrect comment. gcc/cp/ PR middle-end/61848 * decl.c (merge_decls): Don't merge section name, comdat group or tls model to newdecl symtab node, instead merge to olddecl. Override existing olddecl section name. Set tls_model for all thread-local vars, not just OMP thread-private ones. Remove incorrect comment. 2014-10-16 Markus Trippelsdorf mar...@trippelsdorf.de PR middle-end/61848 g++.dg/torture/pr61848.C: New testcase. gcc.c-torture/compile/pr61848.c: New testcase. --- gcc/c/c-decl.c| 28 ++-- gcc/cp/decl.c | 63 +++ gcc/testsuite/g++.dg/torture/pr61848.C| 5 +++ gcc/testsuite/gcc.c-torture/compile/pr61848.c | 5 +++ 4 files changed, 61 insertions(+), 40 deletions(-) create mode 100644 gcc/testsuite/g++.dg/torture/pr61848.C create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr61848.c diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index d9941141041e..18388cb59252 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -2293,22 +2293,10 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) /* Merge the threadprivate attribute. */ if (TREE_CODE (olddecl) == VAR_DECL C_DECL_THREADPRIVATE_P (olddecl)) -{ - set_decl_tls_model (newdecl, DECL_TLS_MODEL (olddecl)); - C_DECL_THREADPRIVATE_P (newdecl) = 1; -} +C_DECL_THREADPRIVATE_P (newdecl) = 1; if (CODE_CONTAINS_STRUCT (TREE_CODE (olddecl), TS_DECL_WITH_VIS)) { - /* Merge the section attribute. -We want to issue an error if the sections conflict but that -must be done later in decl_attributes since we are called -before attributes are assigned. */ - if ((DECL_EXTERNAL (olddecl) || TREE_PUBLIC (olddecl) || TREE_STATIC (olddecl)) - DECL_SECTION_NAME (newdecl) == NULL - DECL_SECTION_NAME (olddecl)) - set_decl_section_name (newdecl, DECL_SECTION_NAME (olddecl)); - /* Copy the assembler name. Currently, it can only be defined in the prototype. */ COPY_DECL_ASSEMBLER_NAME (olddecl, newdecl); @@ -2518,6 +2506,20 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) (char *) newdecl + sizeof (struct tree_decl_common), tree_code_size (TREE_CODE (olddecl)) - sizeof (struct tree_decl_common)); olddecl-decl_with_vis.symtab_node = snode; + + if ((DECL_EXTERNAL (olddecl) + || TREE_PUBLIC (olddecl) + || TREE_STATIC (olddecl)) + DECL_SECTION_NAME (newdecl) != NULL) + set_decl_section_name (olddecl, DECL_SECTION_NAME (newdecl)); + + /* This isn't quite correct for something like + int __thread x attribute ((tls_model (local-exec))); + extern int __thread x; +as we'll lose the local-exec model. */ + if (TREE_CODE (olddecl) == VAR_DECL + DECL_THREAD_LOCAL_P (newdecl)) + set_decl_tls_model (olddecl, DECL_TLS_MODEL (newdecl)); break; } diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 3eba4dcd1d68..1b214ab40907 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -1967,7 +1967,6 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) if (!DECL_LANG_SPECIFIC (newdecl)) retrofit_lang_decl (newdecl); - set_decl_tls_model (newdecl, DECL_TLS_MODEL (olddecl)); CP_DECL_THREADPRIVATE_P (newdecl) = 1; } } @@ -2030,15 +2029,6 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) } } - /* Merge the section attribute. -We want to issue an error if the sections conflict but that must be -done later in decl_attributes since we are called before attributes -are assigned. */ - if ((DECL_EXTERNAL (olddecl) || TREE_PUBLIC (olddecl) ||
IPA ICF fallout: i586 bootstrap failure fix
Hello. Following patch fixes i586 bootstrap failure: https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html ../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ [-Werror=format=] worklist.size ()); ^ ../../src-trunk/gcc/ipa-icf.c: In member function ‘void ipa_icf::sem_item_optimizer::dump_cong_classes()’: ../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ [-Werror=format=] m_classes_count, m_classes.elements(), m_items.length ()); Ready for thunk? Thank you, Martin gcc/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast added. (sem_item_optimizer::dump_cong_classes): Likewise. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index 4e73849..f7510b3 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -2093,7 +2093,7 @@ sem_item_optimizer::process_cong_reduction (void) if (dump_file) fprintf (dump_file, Worklist has been filled with: %lu\n, - worklist.size ()); + (unsigned long) worklist.size ()); if (dump_file (dump_flags TDF_DETAILS)) fprintf (dump_file, Congruence class reduction\n); @@ -2113,7 +2113,7 @@ sem_item_optimizer::dump_cong_classes (void) fprintf (dump_file, Congruence classes: %u (unique hash values: %lu), with total: %u items\n, - m_classes_count, m_classes.elements(), m_items.length ()); + m_classes_count, (unsigned long) m_classes.elements(), m_items.length ()); /* Histogram calculation. */ unsigned int max_index = 0;
Re: IPA ICF fallout: i586 bootstrap failure fix
Hello. Following patch fixes i586 bootstrap failure: https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html ../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ [-Werror=format=] worklist.size ()); ^ ../../src-trunk/gcc/ipa-icf.c: In member function ‘void ipa_icf::sem_item_optimizer::dump_cong_classes()’: ../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ [-Werror=format=] m_classes_count, m_classes.elements(), m_items.length ()); Ready for thunk? OK, (I would consider this obvious) Honza Thank you, Martin gcc/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast added. (sem_item_optimizer::dump_cong_classes): Likewise. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index 4e73849..f7510b3 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -2093,7 +2093,7 @@ sem_item_optimizer::process_cong_reduction (void) if (dump_file) fprintf (dump_file, Worklist has been filled with: %lu\n, - worklist.size ()); + (unsigned long) worklist.size ()); if (dump_file (dump_flags TDF_DETAILS)) fprintf (dump_file, Congruence class reduction\n); @@ -2113,7 +2113,7 @@ sem_item_optimizer::dump_cong_classes (void) fprintf (dump_file, Congruence classes: %u (unique hash values: %lu), with total: %u items\n, -m_classes_count, m_classes.elements(), m_items.length ()); +m_classes_count, (unsigned long) m_classes.elements(), m_items.length ()); /* Histogram calculation. */ unsigned int max_index = 0;
Re: IPA ICF fallout: i586 bootstrap failure fix
On Thu, Oct 16, 2014 at 02:41:36PM +0200, Martin Liška wrote: Hello. Following patch fixes i586 bootstrap failure: https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html ../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ [-Werror=format=] worklist.size ()); ^ ../../src-trunk/gcc/ipa-icf.c: In member function ‘void ipa_icf::sem_item_optimizer::dump_cong_classes()’: ../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ [-Werror=format=] m_classes_count, m_classes.elements(), m_items.length ()); Ready for thunk? Thank you, Martin gcc/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast added. Too long line, please wrap. I'd write Cast to unsigned long. instead. (sem_item_optimizer::dump_cong_classes): Likewise. Ok. Jakub
Re: IPA ICF fallout: i586 bootstrap failure fix
On 10/16/2014 02:45 PM, Jakub Jelinek wrote: On Thu, Oct 16, 2014 at 02:41:36PM +0200, Martin Liška wrote: Hello. Following patch fixes i586 bootstrap failure: https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html ../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ [-Werror=format=] worklist.size ()); ^ ../../src-trunk/gcc/ipa-icf.c: In member function ‘void ipa_icf::sem_item_optimizer::dump_cong_classes()’: ../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ [-Werror=format=] m_classes_count, m_classes.elements(), m_items.length ()); Ready for thunk? Thank you, Martin gcc/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast added. Too long line, please wrap. I'd write Cast to unsigned long. instead. Thank you for note, applied as suggested. Martin (sem_item_optimizer::dump_cong_classes): Likewise. Ok. Jakub
[SH][committed] Remove function fldi_ok
Hi, The fldi_ok function in sh.c has been returning 'true' for while. Thus it can as well be removed. Tested with 'make all-gcc', committed as r216320. Cheers, Oleg gcc/ChangeLog: * config/sh/sh-protos.h (fldi_ok): Remove. * config/sh/sh.c (fldi_ok): Likewise. (sh_secondary_reload): Don't use fldi_ok. * config/sh/constraints.md (G constraint, H constraint): Don't use fldi_ok. Index: gcc/config/sh/sh-protos.h === --- gcc/config/sh/sh-protos.h (revision 216316) +++ gcc/config/sh/sh-protos.h (working copy) @@ -191,7 +191,6 @@ extern void sh_expand_epilogue (bool); extern void sh_set_return_address (rtx, rtx); extern int initial_elimination_offset (int, int); -extern bool fldi_ok (void); extern bool sh_hard_regno_rename_ok (unsigned int, unsigned int); extern bool sh_cfun_interrupt_handler_p (void); extern bool sh_cfun_resbank_handler_p (void); Index: gcc/config/sh/constraints.md === --- gcc/config/sh/constraints.md (revision 216316) +++ gcc/config/sh/constraints.md (working copy) @@ -210,12 +210,12 @@ (define_constraint G Double constant 0. (and (match_code const_double) - (match_test fp_zero_operand (op) fldi_ok ( + (match_test fp_zero_operand (op (define_constraint H Double constant 1. (and (match_code const_double) - (match_test fp_one_operand (op) fldi_ok ( + (match_test fp_one_operand (op ;; Extra constraints (define_constraint Q Index: gcc/config/sh/sh.c === --- gcc/config/sh/sh.c (revision 216316) +++ gcc/config/sh/sh.c (working copy) @@ -9875,19 +9875,6 @@ return REAL_VALUES_EQUAL (r, dconst1); } -/* In general mode switching is used. If we are - compiling without -mfmovd, movsf_ie isn't taken into account for - mode switching. We could check in machine_dependent_reorg for - cases where we know we are in single precision mode, but there is - interface to find that out during reload, so we must avoid - choosing an fldi alternative during reload and thus failing to - allocate a scratch register for the constant loading. */ -bool -fldi_ok (void) -{ - return true; -} - /* Return the TLS type for TLS symbols. */ enum tls_model tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) @@ -13106,8 +13093,7 @@ if (REGCLASS_HAS_FP_REG (rclass) ! TARGET_SHMEDIA immediate_operand ((x), mode) - ! ((fp_zero_operand (x) || fp_one_operand (x)) - mode == SFmode fldi_ok ())) + ! ((fp_zero_operand (x) || fp_one_operand (x)) mode == SFmode)) switch (mode) { case SFmode:
[PATCH][match-and-simplify] Merge from trunk
Bootstrapped on x86_64-unknown-linux-gnu. Richard. 2014-10-16 Richard Biener rguent...@suse.de Merge from trunk r216235 through r216315.
[PATCH] Make infer_loop_bounds_from_ref handle MEM_REFs, fix PR63278
The following makes us infer loop bounds for loops like bb 3: # str_28 = PHI foo(2), str_10(4) ... str_10 = str_28 + 1; _4 = *str_10; if (_4 != 0) goto bb 4; else goto bb 8; bb 4: goto bb 3; or bb 3: # p_15 = PHI p_6(3), a(2) p_6 = p_15 + 1; *p_15 = 0; ... if (n.1_5 i_8) goto bb 3; else goto bb 4; Boostrap and regtest pending on x86_64-unknown-linux-gnu. Honza - is there a symtab way of querying whether DECL_SIZE of a decl is correct? I know to better exclude extern decls and commons, but for example C++ may have stronger rules. Thanks, Richard. 2014-10-16 Richard Biener rguent...@suse.de PR tree-optimization/63278 * tree-ssa-loop-niter.c: Include tree-dfa.h. (struct ilb_data): Add pointer to outermost reference. (idx_infer_loop_bounds): Handle plain MEM_REFs of STRING_CSTs and DECLs. (infer_loop_bounds_from_ref): Adjust. * gcc.dg/tree-ssa/loop-41.c: New testcase. * gcc.dg/tree-ssa/loop-42.c: Likewise. Index: gcc/tree-ssa-loop-niter.c === --- gcc/tree-ssa-loop-niter.c (revision 216258) +++ gcc/tree-ssa-loop-niter.c (working copy) @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. #include stringpool.h #include tree-ssanames.h #include wide-int-print.h +#include tree-dfa.h #define SWAP(X, Y) do { affine_iv *tmp = (X); (X) = (Y); (Y) = tmp; } while (0) @@ -2775,6 +2776,7 @@ struct ilb_data { struct loop *loop; gimple stmt; + tree ref; }; static bool @@ -2787,7 +2789,10 @@ idx_infer_loop_bounds (tree base, tree * struct loop *loop = data-loop; bool reliable = true; - if (TREE_CODE (base) != ARRAY_REF) + if (TREE_CODE (base) != ARRAY_REF + (TREE_CODE (base) != MEM_REF + || base != data-ref + || !integer_zerop (TREE_OPERAND (base, 1 return true; /* For arrays at the end of the structure, we are not guaranteed that they @@ -2816,8 +2821,46 @@ idx_infer_loop_bounds (tree base, tree * || chrec_contains_symbols_defined_in_loop (init, loop-num)) return true; - low = array_ref_low_bound (base); - high = array_ref_up_bound (base); + if (TREE_CODE (base) == MEM_REF) +{ + HOST_WIDE_INT offset; + tree decl; + if (TREE_CODE (init) != ADDR_EXPR) + return true; + decl = get_addr_base_and_unit_offset (TREE_OPERAND (init, 0), offset); + if (!decl + || offset != 0) + return true; + /* If this is a bare MEM_REF with a pointer IV that starts at + offset zero of an object with known size we can easily compute +an upper bound for the pointer IV. */ + if (TREE_CODE (decl) == STRING_CST) + { + low = size_zero_node; + high = size_int (TREE_STRING_LENGTH (decl)); + } + else if (DECL_P (decl) + ((TREE_STATIC (decl) !DECL_COMMON (decl)) + || auto_var_in_fn_p (decl, cfun-decl))) + { + low = size_zero_node; + if (TREE_CODE (DECL_SIZE_UNIT (decl)) != INTEGER_CST) + return true; + high = DECL_SIZE_UNIT (decl); + } + else + return true; + /* We only require an upper estimate for high. So only +if we can, subtract the size of the access. */ + if (TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (base))) == INTEGER_CST) + high = size_binop (MINUS_EXPR, + high, TYPE_SIZE_UNIT (TREE_TYPE (base))); +} + else +{ + low = array_ref_low_bound (base); + high = array_ref_up_bound (base); +} /* The case of nonconstant bounds could be handled, but it would be complicated. */ @@ -2879,6 +2922,7 @@ infer_loop_bounds_from_ref (struct loop data.loop = loop; data.stmt = stmt; + data.ref = ref; for_each_index (ref, idx_infer_loop_bounds, data); } Index: gcc/testsuite/gcc.dg/tree-ssa/loop-42.c === --- gcc/testsuite/gcc.dg/tree-ssa/loop-42.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-42.c (working copy) @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-options -O2 -fdump-tree-cunroll-details } */ + +extern void abort (void); +int a = -1; +int n = sizeof (int); +int main() +{ + char *p; + int i; + for (i = 0, p = (char *)a; i n; ++i) +*p++ = 0; + if (a != 0) +abort (); + return 0; +} + +/* { dg-final { scan-tree-dump loop with 4 iterations completely unrolled cunroll } } */ +/* { dg-final { cleanup-tree-dump cunroll } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/loop-41.c === --- gcc/testsuite/gcc.dg/tree-ssa/loop-41.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-41.c (working copy) @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options -O3 -fdump-tree-cunroll-details } */ + +extern void abort (void); + +static inline unsigned int