[PATCH, AArch64] Fix abitest for ilp32
Hi, Please find attached the patch that fixes abitest for ilp32. testfunc_ptr is a 32bit pointer in ILP32 but is being loaded as 64bit. Hence some of the func-ret testcases FAIL's for ILP32. Please review the patch and let us know if its okay? Regression tested on aarch64-elf. Thanks, Naveen gcc/testsuite 2015-01-15 Andrew Pinski apin...@cavium.com Naveen H.S naveen.hurugalaw...@caviumnetworks.com * gcc.target/aarch64/aapcs64/abitest.S (LABEL_TEST_FUNC_RETURN): Load testfunc_ptr as 32bit for ILP32 and 64bit for LP64.--- gcc/testsuite/ChangeLog 2015-01-14 10:00:59.524914610 +0530 +++ gcc/testsuite/ChangeLog 2015-01-14 10:21:20.928932740 +0530 @@ -1,3 +1,9 @@ +2015-01-15 Andrew Pinski apin...@cavium.com + Naveen H.S naveen.hurugalaw...@caviumnetworks.com + + * gcc.target/aarch64/aapcs64/abitest.S (LABEL_TEST_FUNC_RETURN): Load + testfunc_ptr as 32bit for ILP32 and 64bit for LP64. + 2015-01-13 David Malcolm dmalc...@redhat.com * jit.dg/harness.h (set_up_logging): Move string concatenation --- gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S 2015-01-14 09:37:46.368893934 +0530 +++ gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S 2015-01-14 10:13:08.456925431 +0530 @@ -2,6 +2,13 @@ .global myfunc .type dumpregs,%function .type myfunc,%function + +#ifdef __LP64__ +#define PTR_REG(n) x##n +#else +#define PTR_REG(n) w##n +#endif + dumpregs: myfunc: mov x16, sp @@ -48,7 +55,7 @@ myfunc: LABEL_TEST_FUNC_RETURN: adrp x9, testfunc_ptr add x9, x9, :lo12:testfunc_ptr - ldr x9, [x9, #0] + ldr PTR_REG(9), [x9, #0] blr x9// function return value test adrp x9, saved_return_address add x9, x9, :lo12:saved_return_address
Drop workaround for old binutils linker plugin bug
Hi, this workaround actually triggers bug in quite recent golds, so it seems to be good motivation to finally drop it. The bug is long fixed. Bootstrapped/regtested x86_64-linux, will commit it shortly. Honza * tree-profile.c (init_ic_make_global_vars): Drop workaround for bintuils bug 14342. (init_ic_make_global_vars): Likewise. (gimple_init_edge_profiler): Likewise. (gimple_gen_ic_func_profiler): Likewise. Index: tree-profile.c === --- tree-profile.c (revision 219571) +++ tree-profile.c (working copy) @@ -105,30 +105,15 @@ init_ic_make_global_vars (void) ptr_void = build_pointer_type (void_type_node); - /* Workaround for binutils bug 14342. Once it is fixed, remove lto path. */ - if (flag_lto) -{ - ic_void_ptr_var - = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (__gcov_indirect_call_callee_ltopriv), - ptr_void); - TREE_PUBLIC (ic_void_ptr_var) = 1; - DECL_COMMON (ic_void_ptr_var) = 1; - DECL_VISIBILITY (ic_void_ptr_var) = VISIBILITY_HIDDEN; - DECL_VISIBILITY_SPECIFIED (ic_void_ptr_var) = true; -} - else -{ - ic_void_ptr_var - = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier ( - (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ? - __gcov_indirect_call_topn_callee : - __gcov_indirect_call_callee)), - ptr_void); - TREE_PUBLIC (ic_void_ptr_var) = 1; - DECL_EXTERNAL (ic_void_ptr_var) = 1; -} + ic_void_ptr_var += build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier ( + (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ? + __gcov_indirect_call_topn_callee : + __gcov_indirect_call_callee)), + ptr_void); + TREE_PUBLIC (ic_void_ptr_var) = 1; + DECL_EXTERNAL (ic_void_ptr_var) = 1; TREE_STATIC (ic_void_ptr_var) = 1; DECL_ARTIFICIAL (ic_void_ptr_var) = 1; DECL_INITIAL (ic_void_ptr_var) = NULL; @@ -138,30 +123,16 @@ init_ic_make_global_vars (void) varpool_node::finalize_decl (ic_void_ptr_var); gcov_type_ptr = build_pointer_type (get_gcov_type ()); - /* Workaround for binutils bug 14342. Once it is fixed, remove lto path. */ - if (flag_lto) -{ - ic_gcov_type_ptr_var - = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (__gcov_indirect_call_counters_ltopriv), - gcov_type_ptr); - TREE_PUBLIC (ic_gcov_type_ptr_var) = 1; - DECL_COMMON (ic_gcov_type_ptr_var) = 1; - DECL_VISIBILITY (ic_gcov_type_ptr_var) = VISIBILITY_HIDDEN; - DECL_VISIBILITY_SPECIFIED (ic_gcov_type_ptr_var) = true; -} - else -{ - ic_gcov_type_ptr_var - = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier ( - (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ? - __gcov_indirect_call_topn_counters : - __gcov_indirect_call_counters)), - gcov_type_ptr); - TREE_PUBLIC (ic_gcov_type_ptr_var) = 1; - DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1; -} + + ic_gcov_type_ptr_var += build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier ( + (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ? + __gcov_indirect_call_topn_counters : + __gcov_indirect_call_counters)), + gcov_type_ptr); + TREE_PUBLIC (ic_gcov_type_ptr_var) = 1; + DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1; TREE_STATIC (ic_gcov_type_ptr_var) = 1; DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1; DECL_INITIAL (ic_gcov_type_ptr_var) = NULL; @@ -230,33 +201,18 @@ gimple_init_edge_profiler (void) init_ic_make_global_vars (); - /* Workaround for binutils bug 14342. Once it is fixed, remove lto path. */ - if (flag_lto) -{ - /* void (*) (gcov_type, void *) */ - ic_profiler_fn_type - = build_function_type_list (void_type_node, - gcov_type_ptr, gcov_type_node, - ptr_void, ptr_void, - NULL_TREE); - tree_indirect_call_profiler_fn - = build_fn_decl (__gcov_indirect_call_profiler, -ic_profiler_fn_type); -} - else -{ - /* void (*) (gcov_type, void *) */ - ic_profiler_fn_type - = build_function_type_list (void_type_node, - gcov_type_node, - ptr_void, -
PR 64481 (bootstrap miscompare)
Hi, in December I conditoinally disabled expensive sanity checking in inliner. This triggeres bootstrap miscompare because caches are getting out of sync. This patch fixes the problem found by sanity check - the node growth cache was removed from use in badness calculation by Richard a while ago, but the cache itself remained while the updating logic was dropped. This of course leads to somewhat randomish results. The other problem fixed is that in some cases we forget to walk through aliases to get into the callee. Bootstrapped/regtested x86_64-linux, comitted. PR ipa/64481 * ipa-inline-analysis.c (node_growth_cache): Remove. (initialize_growth_caches): Do not initialize it. (free_growth_caches): Do not free it. (do_estimate_growth): Rename to ... (estimate_growth): ... this one; drop growth cache code. (growth_likely_positive): Always go the heuristics way. * ipa-inline.c (can_inline_edge_p): Walk through aliases. (reset_edge_caches): Do not reset node growth. (heap_edge_removal_hook): Do not maintain cache. (inline_small_functions): Likewise; strenghten sanity check. (ipa_inline): Do not maintain caches. * ipa-inline.h (node_growth_cache): Remove. (do_estimate_growth): Remove to ... (estimate_growth): this one; remove inline version. (reset_node_growth_cache): Remove. Index: ipa-inline-analysis.c === --- ipa-inline-analysis.c (revision 219571) +++ ipa-inline-analysis.c (working copy) @@ -167,7 +167,6 @@ function_summary inline_summary * *inl vecinline_edge_summary_t inline_edge_summary_vec; /* Cached node/edge growths. */ -vecint node_growth_cache; vecedge_growth_cache_entry edge_growth_cache; /* Edge predicates goes here. */ @@ -1341,8 +1340,6 @@ initialize_growth_caches (void) { if (symtab-edges_max_uid) edge_growth_cache.safe_grow_cleared (symtab-edges_max_uid); - if (symtab-cgraph_max_uid) -node_growth_cache.safe_grow_cleared (symtab-cgraph_max_uid); } @@ -1352,7 +1349,6 @@ void free_growth_caches (void) { edge_growth_cache.release (); - node_growth_cache.release (); } @@ -3931,7 +3927,7 @@ do_estimate_growth_1 (struct cgraph_node /* Estimate the growth caused by inlining NODE into all callees. */ int -do_estimate_growth (struct cgraph_node *node) +estimate_growth (struct cgraph_node *node) { struct growth_data d = { node, 0, false }; struct inline_summary *info = inline_summaries-get (node); @@ -3960,12 +3956,6 @@ do_estimate_growth (struct cgraph_node * + 50) / 100; } - if (node_growth_cache.exists ()) -{ - if ((int) node_growth_cache.length () = node-uid) - node_growth_cache.safe_grow_cleared (symtab-cgraph_max_uid); - node_growth_cache[node-uid] = d.growth + (d.growth = 0); -} return d.growth; } @@ -3979,7 +3969,6 @@ bool growth_likely_positive (struct cgraph_node *node, int edge_growth ATTRIBUTE_UNUSED) { int max_callers; - int ret; struct cgraph_edge *e; gcc_checking_assert (edge_growth 0); @@ -3999,10 +3988,6 @@ growth_likely_positive (struct cgraph_no || !node-can_remove_if_no_direct_calls_p ()) return true; - /* If there is cached value, just go ahead. */ - if ((int)node_growth_cache.length () node-uid - (ret = node_growth_cache[node-uid])) -return ret 0; if (!node-will_be_removed_from_program_if_no_direct_calls_p () (!DECL_COMDAT (node-decl) || !node-can_remove_if_no_direct_calls_p ())) Index: ipa-inline.c === --- ipa-inline.c(revision 219571) +++ ipa-inline.c(working copy) @@ -388,11 +388,11 @@ can_inline_edge_p (struct cgraph_edge *e else if (caller_tree != callee_tree) { if (((opt_for_fn (e-caller-decl, optimize) -opt_for_fn (e-callee-decl, optimize)) +opt_for_fn (callee-decl, optimize)) || (opt_for_fn (e-caller-decl, optimize_size) - != opt_for_fn (e-callee-decl, optimize_size))) + != opt_for_fn (callee-decl, optimize_size))) /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */ - !DECL_DISREGARD_INLINE_LIMITS (e-callee-decl)) + !DECL_DISREGARD_INLINE_LIMITS (callee-decl)) { e-inline_failed = CIF_OPTIMIZATION_MISMATCH; inlinable = false; @@ -1095,9 +1095,6 @@ reset_edge_caches (struct cgraph_node *n if (where-global.inlined_to) where = where-global.inlined_to; - /* WHERE body size has changed, the cached growth is invalid. */ - reset_node_growth_cache (where); - for (edge = where-callers; edge; edge = edge-next_caller) if (edge-inline_failed) reset_edge_growth_cache (edge); @@ -1428,8 +1425,6 @@ add_new_edges_to_heap (edge_heap_t *heap static void
Re: [PATCH 0/6, nds32] Committed: Have -mcmodel option to support varied code model of programs.
2015-01-14 6:22 GMT+08:00 Joseph Myers jos...@codesourcery.com: On Tue, 13 Jan 2015, Chung-Ju Wu wrote: To fix this issue, we are going to use -mcmodel=X options, which probably gives more flexibility to support varied code model on code generation. The -mgp-direct option now becomes meaningless and can be discarded. If you add or remove command-line options, you need to update invoke.texi accordingly. Thanks for your reminder. I will propose another patch to update documentation as soon as possible. Best regards, jasonwucj -- Joseph S. Myers jos...@codesourcery.com
[PATCH] Fix PR c++/16160
This patch fixes the above PR where it was reported that the C++ frontend does not reject the malformed class declaration struct X5; Instead of rejecting it, the FE treats this declaration as if it were a forward declaration of a template specialization, i.e. as if it were written template struct X5; First off, the FE should reject the declaration because it is malformed (not 100% sure, though). Second, since the user probably intended to have written an explicit template instantiation (as in the PR), the FE should suggest adding template before such a declaration, that is the declaration struct X5; // error + suggest adding template This patch does both these things along with adding error messages + suggestions for struct X5 { }; // error + suggest adding template and template struct X5 { }; // error + suggest replacing with template Bootstrap and regtesting in progress. Does this patch look OK for trunk? gcc/cp/ChangeLog: PR c++/16160 * parser.c (cp_parser_class_head): Identify and reject malformed template-id declarations and definitions. --- gcc/cp/parser.c | 53 +++- gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C | 2 +- gcc/testsuite/g++.dg/ext/attrib9.C | 2 +- gcc/testsuite/g++.dg/template/crash54.C | 2 +- gcc/testsuite/g++.dg/template/error55.C | 11 +++ 5 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/error55.C diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 3290dfa..f6dc004 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -20264,6 +20264,34 @@ cp_parser_class_head (cp_parser* parser, } virt_specifiers = cp_parser_virt_specifier_seq_opt (parser); + /* Make sure a top-level template-id declaration or definition is preceded + by template or template . */ + if (template_id_p + at_namespace_scope_p () + parser-num_template_parameter_lists == 0 + !processing_explicit_instantiation) +{ + if (cp_parser_next_token_starts_class_definition_p (parser)) + { + error_at (type_start_token-location, + an explicit specialization must be preceded by + %template %); + invalid_explicit_specialization_p = true; + /* Try to recover gracefully by taking the same action that would +have been taken by cp_parser_explicit_specialization. */ + ++parser-num_template_parameter_lists; + begin_specialization (); + } + else if (cp_parser_declares_only_class_p (parser)) + { + error_at (type_start_token-location, + an explicit instantiation must be preceded by + %template%); + type = error_mark_node; + goto out; + } +} + /* If it's not a `:' or a `{' then we can't really be looking at a class-head, since a class-head only appears as part of a class-specifier. We have to detect this situation before calling @@ -20275,6 +20303,16 @@ cp_parser_class_head (cp_parser* parser, goto out; } + if (processing_explicit_instantiation) +{ + error_at (type_start_token-location, + an explicit instantiation may not have a definition); + inform (type_start_token-location, + use %template % to define an explicit specialization); + type = error_mark_node; + goto out; +} + /* At this point, we're going ahead with the class-specifier, even if some other problem occurs. */ cp_parser_commit_to_tentative_parse (parser); @@ -20346,20 +20384,7 @@ cp_parser_class_head (cp_parser* parser, num_templates = 0; } } - /* An explicit-specialization must be preceded by template . If - it is not, try to recover gracefully. */ - if (at_namespace_scope_p () - parser-num_template_parameter_lists == 0 - template_id_p) -{ - error_at (type_start_token-location, - an explicit specialization must be preceded by %template %); - invalid_explicit_specialization_p = true; - /* Take the same action that would have been taken by -cp_parser_explicit_specialization. */ - ++parser-num_template_parameter_lists; - begin_specialization (); -} + /* There must be no return statements between this point and the end of this function; set type to the correct return value and use goto done; to return. */ diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C index 3dc51ee..4957ba1 100644 --- a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C +++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C @@ -9,4 +9,4 @@ enum [[gnu::unused]] e; // { dg-warning already defined } struct [[gnu::unused]] B *p; // { dg-warning attributes } template class T struct A { }; -struct [[gnu::unused]] Aint; // { dg-warning attributes } +struct [[gnu::unused]]
Patch ping...
Hi, I would like to ping the patch to fix divergence between a type and its main variant introduced by C++ FE. https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01202.html Honza
Re: [PATCH/expand] PR64011 Adjust bitsize when partial overflow happen for big-endian
On 01/13/15 15:42, Joseph Myers wrote: On Tue, 13 Jan 2015, Jeff Law wrote: In many ways having the compiler or assembler spitting out an error here is preferable to silently compiling the code. That would also help explain why As usual, an error is incorrect in such a case that only has undefined behavior at runtime (but it may be compiled into an abort if the behavior is unconditionally undefined, and the abort doesn't replace anything before the undefined behavior that might have stopped the undefined behavior from occurring). You are, of course, correct. We can't error here, but we can generate a conditional warning. jeff
Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
On 01/13/15 17:03, Segher Boessenkool wrote: On Tue, Jan 13, 2015 at 03:17:08PM -0700, Jeff Law wrote: And finally there is the case of non-volatile asm with memory clobber with no memory stores in between the two - the posted (safer) patch will not allow to CSE the two, while in theory we could CSE them into just one asm. I think we have to assume that CSEing them is wrong. The first may set something in memory that is read by the second. Thoughts? I agree with pretty much everything you say in the thread, except for this idea that a memory clobber reads memory. No clobber reads anything. The commit that introduced the memory clobber concept, 426b38c9 (svn 1207), by rms, has as only comment /* `memory', don't cache memory across asm */ RMS botched this and you can see it in that the scheduler was not updated at the same time. The scheduler absolutely must track if an ASM does a memory read of an arbitrary location. I'd have to dig deeper to see when this got fixed, but it was clearly botched. Many years later another pass which needs to precisely track such things came along, namely DSE. The code in DSE is actually easier to grok. First, if you look at the ASM handling in cfgexpand.c you'll find: if (j == -4) /* `memory', don't cache memory across asm */ { XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); continue; } So we generate (CLOBBER (MEM:BLK (SCRATCH))) when we see memory in the clobber list of an ASM. If you then look at dse.c we have this in record_store: /* At this point we know mem is a mem. */ if (GET_MODE (mem) == BLKmode) { if (GET_CODE (XEXP (mem, 0)) == SCRATCH) { if (dump_file (dump_flags TDF_DETAILS)) fprintf (dump_file, adding wild read for (clobber (mem:BLK (scratch))\n); add_wild_read (bb_info); insn_info-cannot_delete = true; return 0; } Which says very precisely that we treat (CLOBBER (MEM:BLK (SCRATCH))) as potentially *reading* any location. If you trace through how the scheduler builds dependencies, paying particular attention to alias.c you'll see that (CLOBBER (MEM:BLK (SCRATCH))) is treated as both a read and a write of an arbitrary location. It's unfortunate that RMS put the memory tag in the clobber list. But he really wasn't a compiler junkie and didn't realize the right thing to do was to have a memory tag in both the inputs and [output|clobber] section to represent a read of an arbitrary location and a write to an arbitrary location independently. But it is what it is at this point and we have to treat memory appearing in the clobber list as an arbitrary memory read and an arbitrary memory write. Jeff
[PATCH, nds32] Committed: Remove some features that are not available yet in nds32 port of GNU binutils package.
Hi, all, The nds32 target supports two features, fp-as-gp and ex9, designed for code size optimizations. They are majorly performed by linker so that compiler is merely to give some hints or directives with -mforce-fp-as-gp, -mforbid-fp-as-gp, and -mex9 options. However, those two features are not available yet in the current nds32 port of GNU binutils package. For consistency concern, I think it would be better to remove them from gcc trunk as well for now. Committed as Rev. 219576: https://gcc.gnu.org/r219576 gcc/ChangeLog 2015-01-14 Chung-Ju Wu jasonw...@gmail.com * config/nds32/nds32.opt (mforce-fp-as-gp): Remove. (mforbid-fp-as-gp): Remove. (mex9): Remove. * config/nds32/nds32-fp-as-gp.c (nds32_have_prologue_p): Remove. (nds32_symbol_load_store_p): Remove. (nds32_fp_as_gp_check_available): Clean up implementation. * config/nds32/nds32.h (LINK_SPEC): Remove -mforce-as-gp and -mex9 cases. * config/nds32/nds32.c (nds32_asm_file_start): No need to consider fp-as-gp and ex9 cases. Best regards, jasonwucj 0010-Remove-some-features-that-are-not-available-yet-in-n.patch Description: Binary data
Re: [committed] Update copyright years, part 2
Jakub Jelinek ja...@redhat.com writes: Patch too large to attach uncompressed, this has been created with update-copyright.py --this-year. Note, I had to temporarily move away gcc/jit/docs/conf.py, the python script dies on that and leaves almost all files unchanged. Thanks for doing the update. Is the patch below OK to fix the JIT thing? After this change, update-copyright.py --this-year seems to update gcc/jit correctly (including the texinfo files). Richard contrib/ * update-copyright.py (Copyright.__init__): Add a regexp for copyright = u'. (Copyright.update_copyright): Don't add a space before the year in that case. Index: contrib/update-copyright.py === --- contrib/update-copyright.py 2014-08-05 10:29:02.695491816 +0100 +++ contrib/update-copyright.py 2015-01-13 14:13:43.500812967 + @@ -183,6 +183,7 @@ class Copyright: '|[Cc]opyright\s+%s' '|[Cc]opyright\s+copy;' '|[Cc]opyright\s+@copyright{}' +'|copyright = u\'' '|@set\s+copyright[\w-]+)' # 2: the years. Include the whitespace in the year, so that @@ -363,7 +364,8 @@ class Copyright: return (False, orig_line, next_line) line = (line[:match.start (2)] -+ ' ' + canon_form + self.separator ++ ('' if intro.startswith ('copyright = ') else ' ') ++ canon_form + self.separator + line[match.end (2):]) # Use the standard (C) form.
Re: shift/extract SHIFT_COUNT_TRUNCATED combine bug
On Tue, Jan 13, 2015 at 10:51:27AM +0100, Richard Biener wrote: IMHO SHIFT_COUNT_TRUNCATED should be removed and instead backends should provide shift patterns with a (and:QI ...) for the shift amount which simply will omit that operation if suitable. Note that that catches less though, e.g. in int f(int x, int n) { return x ((2*n) 31); } without SHIFT_COUNT_TRUNCATED it will try to match an AND with 30, not with 31. Segher
[PATCH][AArch64 Intrinsics] Replace temporary assembler for vst1_lane
Nowadays, just storing the (bigendian-corrected) vector element to the address, generates exactly the same assembler for all cases except {float,int,uint}64x1_t, where st1 {v0.d}[0], [x0] becomes str d0, [x0] This is not a problem, and the change will be much better for optimization through the midend, as well as making use of previous improvements in error reporting. Also move the /* vst1q */ comment, which was a couple intrinsics too late. gcc/ChangeLog: * config/aarch64/arm_neon.h (vst1_lane_f32, vst1_lane_f64, vst1_lane_p8, vst1_lane_p16, vst1_lane_s8, vst1_lane_s16, vst1_lane_s32, vst1_lane_s64, vst1_lane_u8, vst1_lane_u16, vst1_lane_u32, vst1_lane_u64, vst1q_lane_f32, vst1q_lane_f64, vst1q_lane_p8, vst1q_lane_p16, vst1q_lane_s8, vst1q_lane_s16, vst1q_lane_s32, vst1q_lane_s64, vst1q_lane_u8, vst1q_lane_u16, vst1q_lane_u32, vst1q_lane_u64): Reimplement with pointer dereference and __aarch64_vget_lane_any. Cross-tested check-gcc on aarch64-none-elf and aarch64_be-none-elf. Ok for trunk? Cheers, Alancommit 926aec661699e52f617f16068075ef0242a43609 Author: Alan Lawrence alan.lawre...@arm.com Date: Thu Dec 11 17:29:54 2014 + Replace temporary inline assembler for vst1_lane, move /* vst1q */ comment. Note for (float|u?int)64x1 vectors, st1 {v0.d}[0], [x0] becomes str d0, [x0] diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 3d1bcd5..980490f 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10304,272 +10304,6 @@ vrsqrtss_f32 (float32_t a, float32_t b) result; \ }) -#define vst1_lane_f32(a, b, c) \ - __extension__ \ -({ \ - float32x2_t b_ = (b);\ - float32_t * a_ = (a);\ - __asm__ (st1 {%1.s}[%2],[%0] \ -: \ -: r(a_), w(b_), i(c) \ -: memory);\ - }) - -#define vst1_lane_f64(a, b, c) \ - __extension__ \ -({ \ - float64x1_t b_ = (b);\ - float64_t * a_ = (a);\ - __asm__ (st1 {%1.d}[%2],[%0] \ -: \ -: r(a_), w(b_), i(c) \ -: memory);\ - }) - -#define vst1_lane_p8(a, b, c) \ - __extension__ \ -({ \ - poly8x8_t b_ = (b); \ - poly8_t * a_ = (a); \ - __asm__ (st1 {%1.b}[%2],[%0] \ -: \ -: r(a_), w(b_), i(c) \ -: memory);\ - }) - -#define vst1_lane_p16(a, b, c) \ - __extension__ \ -({ \ - poly16x4_t b_ = (b); \ - poly16_t * a_ = (a); \ - __asm__ (st1 {%1.h}[%2],[%0] \ -: \ -: r(a_), w(b_), i(c) \ -: memory);\ - }) - -#define vst1_lane_s8(a, b, c) \ - __extension__ \ -({ \ - int8x8_t b_ = (b); \ - int8_t * a_ = (a); \ - __asm__ (st1 {%1.b}[%2],[%0] \ -: \ -
Re: [committed] Update copyright years, part 2
On Tue, Jan 13, 2015 at 05:06:35PM +, Richard Sandiford wrote: Jakub Jelinek ja...@redhat.com writes: Patch too large to attach uncompressed, this has been created with update-copyright.py --this-year. Note, I had to temporarily move away gcc/jit/docs/conf.py, the python script dies on that and leaves almost all files unchanged. Thanks for doing the update. Is the patch below OK to fix the JIT thing? After this change, update-copyright.py --this-year seems to update gcc/jit correctly (including the texinfo files). Richard contrib/ * update-copyright.py (Copyright.__init__): Add a regexp for copyright = u'. (Copyright.update_copyright): Don't add a space before the year in that case. Ok, thanks. --- contrib/update-copyright.py 2014-08-05 10:29:02.695491816 +0100 +++ contrib/update-copyright.py 2015-01-13 14:13:43.500812967 + @@ -183,6 +183,7 @@ class Copyright: '|[Cc]opyright\s+%s' '|[Cc]opyright\s+copy;' '|[Cc]opyright\s+@copyright{}' +'|copyright = u\'' '|@set\s+copyright[\w-]+)' # 2: the years. Include the whitespace in the year, so that @@ -363,7 +364,8 @@ class Copyright: return (False, orig_line, next_line) line = (line[:match.start (2)] -+ ' ' + canon_form + self.separator ++ ('' if intro.startswith ('copyright = ') else ' ') ++ canon_form + self.separator + line[match.end (2):]) # Use the standard (C) form. Jakub
Re: [PATCH] [AArch64, NEON] Improve vpmaxX vpminX intrinsics
On 09/12/14 08:17, Yangfei (Felix) wrote: On 28 November 2014 at 09:23, Yangfei (Felix) felix.y...@huawei.com wrote: Hi, This patch converts vpmaxX vpminX intrinsics to use builtin functions instead of the previous inline assembly syntax. Regtested with aarch64-linux-gnu on QEMU. Also passed the glorious testsuite of Christophe Lyon. OK for the trunk? Hi Felix, We know from experience that the advsimd intrinsics tend to be fragile for big endian and in general it is fairly easy to break the big endian case. For these advsimd improvements that you are working on (that we very much appreciate) it is important to run both little endian and big endian regressions. Thanks /Marcus Okay. Any plan for the advsimd big-endian improvement? I rebased this patch over Alan Lawrance's patch: https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00279.html No regressions for aarch64_be-linux-gnu target too. OK for the thunk? Index: gcc/ChangeLog === --- gcc/ChangeLog (revision 218464) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,18 @@ +2014-12-09 Felix Yang felix.y...@huawei.com + + * config/aarch64/aarch64-simd.md (aarch64_maxmin_unspmode): New + pattern. + * config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp, + uminp, smax_nanp, smin_nanp): New builtins. + * config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32, + vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32, + vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64, + vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64, + vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8, + vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8, + vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64, + vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, vpminnmqd_f64, + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) Index: gcc/config/aarch64/aarch64-simd.md === --- gcc/config/aarch64/aarch64-simd.md (revision 218464) +++ gcc/config/aarch64/aarch64-simd.md (working copy) @@ -1017,6 +1017,28 @@ DONE; }) +;; Pairwise Integer Max/Min operations. +(define_insn aarch64_maxmin_unspmode + [(set (match_operand:VDQ_BHSI 0 register_operand =w) + (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 register_operand w) +(match_operand:VDQ_BHSI 2 register_operand w)] + MAXMINV))] + TARGET_SIMD + maxmin_uns_opp\t%0.Vtype, %1.Vtype, %2.Vtype + [(set_attr type neon_minmaxq)] +) + Hi Felix, Sorry for the delay in getting back to you on this. If you've rolled aarch64_reduc_maxmin_uns_internalv2si into the above pattern, do you still need it? For all its call points, just point them to aarch64_maxmin_unspmode? Thanks, Tejas.
Re: [PATCH] add option to emit more array bounds warnigs
Mon, 12 Jan 2015 11:00:44 -0700 Jeff Law l...@redhat.com: On 11/11/14 23:13, Martin Uecker wrote: ... * gcc/tree-vrp.c (check_array_ref): Emit more warnings for warn_array_bounds = 2. * gcc/testsuite/gcc.dg/Warray-bounds-11.c: New test-case. * gcc/c-family/c.opt: New option -Warray-bounds=. * gcc/common.opt: New option -Warray-bounds=. * gcc/doc/invoke.texi: Document new option. Has this patch been bootstrapped and regression tested, if so on what platform. x86_64-unknown-linux-gnu Given the new warnings (as implemented by the patch) are not enabled by default, I'm inclined to approve once Martin verifies things via bootstrap and regression test. Thank you, Martin
[PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips
Hi, The call-saved-{4-6}.c tests in the mips testsuite fail for micromips. The reason is that micromips uses the swm and lwm instructions to save/restore the call-saved registers rather than using the sw and lw instructions. The swm and lwm instructions only list the range of registers to use ie. $16-$25 and hence some of the scan-assembler patterns fail. This fix adds the NO_COMPRESSION attribute to the foo function to force the tests to always compile as mips. I have tested this for both mips and micromips, and the tests now pass successfully. The ChangeLog and patch are below. Ok to commit? Many thanks, Andrew testsuite/ * gcc.target/mips/call-saved-4.c: Add NO_COMPRESSION attribute. * gcc.target/mips/call-saved-5.c: Likewise. * gcc.target/mips/call-saved-6.c: Likewise. diff --git a/gcc/testsuite/gcc.target/mips/call-saved-4.c b/gcc/testsuite/gcc.target/mips/call-saved-4.c index 846ea32..92881c4 100644 --- a/gcc/testsuite/gcc.target/mips/call-saved-4.c +++ b/gcc/testsuite/gcc.target/mips/call-saved-4.c @@ -3,7 +3,7 @@ void bar (void); -void +NOCOMPRESSION void foo (int x) { __builtin_unwind_init (); diff --git a/gcc/testsuite/gcc.target/mips/call-saved-5.c b/gcc/testsuite/gcc.target/mips/call-saved-5.c index 2937b31..152b28f 100644 --- a/gcc/testsuite/gcc.target/mips/call-saved-5.c +++ b/gcc/testsuite/gcc.target/mips/call-saved-5.c @@ -3,7 +3,7 @@ void bar (void); -void +NOCOMPRESSION void foo (int x) { __builtin_unwind_init (); diff --git a/gcc/testsuite/gcc.target/mips/call-saved-6.c b/gcc/testsuite/gcc.target/mips/call-saved-6.c index 0d1a4c8..a384d4a 100644 --- a/gcc/testsuite/gcc.target/mips/call-saved-6.c +++ b/gcc/testsuite/gcc.target/mips/call-saved-6.c @@ -3,7 +3,7 @@ void bar (void); -void +NOCOMPRESSION void foo (int x) { __builtin_unwind_init ();
RE: [PATCH] Fix for PR64081 in RTL loop unroller
Is it really sufficient here to verify that all the defs are on latch predecessors, what about the case where there is a predecessor without a def. How do you guarantee domination in that case? ISTM that given the structure for the code you're writing that you'd want to verify that in the event of multiple definitions that all of them appear on immediate predecessors of the latch *and* that each immediate predecessor has a definition. Yes, do you think it's better to check exactly immediate predecessors? - - if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef))) - return false; + { + def_num++; + if (!(def_pred_latch = def_pred_latch_p (adef)) + || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)), Whitespace nit here. Whitespace goes before the open paren for the function call, not after. Thanks for catching this! @@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def) static enum iv_grd_result iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def) And in this routine, you appear to do both checks. ie, each def is on an immediate predecessor and each immediate predecessor has a def. Is there some reason why iv_get_reaching_def has the stronger check while latch_dominating_def does not? Looks like I was sure that latch_dominating_def always goes after iv_get_reaching_def but now I see it is not true. Will add another check in latch_dominating_def. Thanks, Igor jeff
Re: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips
On Tue, 13 Jan 2015, Andrew Bennett wrote: The call-saved-{4-6}.c tests in the mips testsuite fail for micromips. The reason is that micromips uses the swm and lwm instructions to save/restore the call-saved registers rather than using the sw and lw instructions. The swm and lwm instructions only list the range of registers to use ie. $16-$25 and hence some of the scan-assembler patterns fail. This fix adds the NO_COMPRESSION attribute to the foo function to force the tests to always compile as mips. I have tested this for both mips and micromips, and the tests now pass successfully. The ChangeLog and patch are below. Hmm, instead of trying to avoid testing microMIPS code generation just to satisfy the test suite I'd rather see the test cases updated so that LWM/SWM register ranges are expected and accepted whenever microMIPS code is produced. These scan patterns can be made conditional. Maciej
Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
On 01/13/15 09:18, Jakub Jelinek wrote: Hi! My PR60663 fix unfortunately stopped CSE of all inline-asms, even when they e.g. only have the clobbers added by default. This patch attempts to restore the old behavior, with the exceptions: 1) as always, asm volatile is not CSEd 2) inline-asm with multiple outputs are not CSEd 3) on request from Richard (which Segher on IRC argues against), memory clobber also prevents CSE; this can be removed by removing the int j, lim = XVECLEN (x, 0); and loop below it 4) inline-asm with clobbers is never copied into an insn that wasn't inline-asm before, so if there are clobbers, we allow CSEing of e.g. two same inline-asms, but only by reusing results of one of those Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with arm cross after reverting the PR60663 arm cost fix. Ok for trunk this way, or with 3) removed? 2015-01-13 Jakub Jelinek ja...@redhat.com PR rtl-optimization/63637 PR rtl-optimization/60663 * cse.c (merge_equiv_classes): Set new_elt-cost to MAX_COST if elt-cost is MAX_COST for ASM_OPERANDS. (find_sets_in_insn): Fix up comment typo. (cse_insn): Don't set src_volatile for all non-volatile ASM_OPERANDS in PARALLELs, but just those with multiple outputs or with memory clobber. Set elt-cost to MAX_COST for ASM_OPERANDS in PARALLEL. Set src_elt-cost to MAX_COST if new_src is ASM_OPERANDS and elt-cost is MAX_COST. * gcc.dg/pr63637-1.c: New test. * gcc.dg/pr63637-2.c: New test. * gcc.dg/pr63637-3.c: New test. * gcc.dg/pr63637-4.c: New test. * gcc.dg/pr63637-5.c: New test. * gcc.dg/pr63637-6.c: New test. * gcc.target/i386/pr63637-1.c: New test. * gcc.target/i386/pr63637-2.c: New test. * gcc.target/i386/pr63637-3.c: New test. * gcc.target/i386/pr63637-4.c: New test. * gcc.target/i386/pr63637-5.c: New test. * gcc.target/i386/pr63637-6.c: New test. --- gcc/cse.c.jj2015-01-09 21:59:44.0 +0100 +++ gcc/cse.c 2015-01-13 13:26:23.391216064 +0100 @@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c } new_elt = insert (exp, class1, hash, mode); new_elt-in_memory = hash_arg_in_memory; + if (GET_CODE (exp) == ASM_OPERANDS elt-cost == MAX_COST) + new_elt-cost = MAX_COST; } } } @@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc { int i, lim = XVECLEN (x, 0); - /* Go over the epressions of the PARALLEL in forward order, to + /* Go over the expressions of the PARALLEL in forward order, to put them in the same order in the SETS array. */ for (i = 0; i lim; i++) { @@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn) REGNO (dest) = FIRST_PSEUDO_REGISTER) sets[i].src_volatile = 1; - /* Also do not record result of a non-volatile inline asm with -more than one result or with clobbers, we do not want CSE to -break the inline asm apart. */ else if (GET_CODE (src) == ASM_OPERANDS GET_CODE (x) == PARALLEL) - sets[i].src_volatile = 1; + { + /* Do not record result of a non-volatile inline asm with +more than one result. */ + if (n_sets 1) + sets[i].src_volatile = 1; + + int j, lim = XVECLEN (x, 0); + for (j = 0; j lim; j++) + { + rtx y = XVECEXP (x, 0, j); + /* And do not record result of a non-volatile inline asm +with memory clobber. */ + if (GET_CODE (y) == CLOBBER MEM_P (XEXP (y, 0))) Can you please add a comment here which references the full form of the memory tag. (clobber (mem:BLK (scratch))). If we ever have to look at this again (say perhaps to break out the read anything vs write anything into separate tags :-) it'll save considerable time and angst trying to track all this stuff down. The tests you've got are a step forward, but there's obviously a lot more we could do. For example testing DSE around ASMs without and without a memory clobber, testing CSE of unrelated memory references around an ASM without and without a memory clobber come to mind. You don't have to add them to get approval, but if you were to take the time to cobble them together it'd be hugely appreciated. Given the discussion with Segher, let's give him a chance to chime in on tonight's messages before we make a final decision. jeff
Re: [PATCH] add option to emit more array bounds warnigs
On 01/13/15 17:40, Martin Uecker wrote: Jeff Law l...@redhat.com: On 01/13/15 10:34, Martin Uecker wrote: Mon, 12 Jan 2015 11:00:44 -0700 Jeff Law l...@redhat.com: On 11/11/14 23:13, Martin Uecker wrote: ... Has this patch been bootstrapped and regression tested, if so on what platform. x86_64-unknown-linux-gnu Approved. Please install on the trunk. Sorry about the delays. I don't have write access ;-( I fixed up the ChangeLog entries and installed the patch for you. If you plan to contribute regularly, you should go ahead and apply for write access to the repository so that you'll be able to commit your own patches once they're approved. You'll also need to make sure you have an assignment on file with the FSF.That patch was pretty small (the testcase was larger than the patch itself, which I always like :-) so I didn't request an assignment. Further submissions likely will require an assignment. Thanks, jeff
Re: [PATCH] PR59448 - Promote consume to acquire
On 01/13/15 15:56, Andrew MacLeod wrote: On 01/13/2015 02:06 PM, Andrew MacLeod wrote: On 01/13/2015 01:38 PM, Torvald Riegel wrote: On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote: On 01/13/2015 09:59 AM, Richard Biener wrote: On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. That was the original patch. The issue is that it promotes consume to acquire before any error checking gets to look at the model, so then we allow illegal specification of consume. (It actually triggers a failure in the testsuite) (This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c) The documentation of the atomic builtins also disallows mo_consume on atomic_exchange. However, I don't see any such requirement in C11 or C++14 (and I'd be surprised to see it in C++11). It would be surprising also because for other atomic read-modify-write operations (eg, fetch_add), we don't make such a requirement in the builtins docs -- and atomic_exchange is just a read-modify-write with a noop, basically. Does anyone remember why this requirement for no consume on exchange was added, or sees a reason to keep it? If not, I think we should drop it. This would solve the testsuite failure for Andrew. Dropping it would prevent GCC from checking the consume-on-success / acquire-on-failure case for compare_excahnge I mentioned previously, but I think that this is pretty harmless. I could imagine that, for some reason, either backends or libatomic do not implement consume on atomic_exchange just because the docs disallowed it -- but I haven't checked that. I imagine it was probably in a previous incarnation of the standard... Most of this was actually implemented based on very early draft standards years and years ago and never revised. It wasnt put in by me unless the standard at some point said had such wording. The current standard appears to make no mention of the situation. It seems that it should be safe to move back to the original patch, and remove that error test for using consume on an exchange... Andrew Here's the original patch along with the lien removed from the testcase. x86_64-unknown-linux-gnu bootstraps, no regressions, and so forth. OK for trunk? -ENOPATCH However, I can get it from the BZ and it's OK assuming you also fixup the one testcase we've discussed on this thread. Jeff
Re: [PATCH][rtlanal.c][BE][1/2] Fix vector load/stores to not use ld1/st1
On 01/10/15 06:05, Richard Sandiford wrote: Sorry for the slow response. Jeff has approved the patch in the meantime, but I didn't want to go ahead and apply it while there was still disagreement... Thanks. I didn't realize there was a disagreement when I approved. Let's continue to hash this out a bit in the hopes that we can all get to a place where we're comfortable with the final change, whatever it happens to me. jeff
Re: [PATCH] [AArch64, NEON] Improve vpmaxX vpminX intrinsics
On 09/12/14 08:17, Yangfei (Felix) wrote: On 28 November 2014 at 09:23, Yangfei (Felix) felix.y...@huawei.com wrote: Hi, This patch converts vpmaxX vpminX intrinsics to use builtin functions instead of the previous inline assembly syntax. Regtested with aarch64-linux-gnu on QEMU. Also passed the glorious testsuite of Christophe Lyon. OK for the trunk? Hi Felix, We know from experience that the advsimd intrinsics tend to be fragile for big endian and in general it is fairly easy to break the big endian case. For these advsimd improvements that you are working on (that we very much appreciate) it is important to run both little endian and big endian regressions. Thanks /Marcus Okay. Any plan for the advsimd big-endian improvement? I rebased this patch over Alan Lawrance's patch: https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00279.html No regressions for aarch64_be-linux-gnu target too. OK for the thunk? Index: gcc/ChangeLog = == --- gcc/ChangeLog (revision 218464) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,18 @@ +2014-12-09 Felix Yang felix.y...@huawei.com + + * config/aarch64/aarch64-simd.md (aarch64_maxmin_unspmode): New + pattern. + * config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp, + uminp, smax_nanp, smin_nanp): New builtins. + * config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32, + vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32, + vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64, + vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64, + vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8, + vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8, + vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64, + vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, + vpminnmqd_f64, + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) Index: gcc/config/aarch64/aarch64-simd.md = == --- gcc/config/aarch64/aarch64-simd.md (revision 218464) +++ gcc/config/aarch64/aarch64-simd.md (working copy) @@ -1017,6 +1017,28 @@ DONE; }) +;; Pairwise Integer Max/Min operations. +(define_insn aarch64_maxmin_unspmode + [(set (match_operand:VDQ_BHSI 0 register_operand =w) + (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 register_operand w) +(match_operand:VDQ_BHSI 2 register_operand w)] + MAXMINV))] + TARGET_SIMD + maxmin_uns_opp\t%0.Vtype, %1.Vtype, %2.Vtype + [(set_attr type neon_minmaxq)] +) + Hi Felix, Sorry for the delay in getting back to you on this. If you've rolled aarch64_reduc_maxmin_uns_internalv2si into the above pattern, do you still need it? For all its call points, just point them to aarch64_maxmin_unspmode? Thanks, Tejas. Hello Tejas, I didn't do this yet. Currently the aarch64_reduc_maxmin_uns_internalv2si is only called by reduc_maxmin_uns_scal_mode. I find it kind of trouble to handle this due to the use of iterators in the caller pattern. Are you going to rework this part?
Re: [PATCH] Correct target selector in -mfentry tests
On 01/13/15 14:27, H.J. Lu wrote: -fprofile -mfentry works with PIE if gcrt1.o is compiled with -fPIC. A glibc has been filed, PR 17836, and a glibc patch has been submitted. OK for trunk? Thanks. H.J. -- * gcc.target/i386/fentry-override.c: Properly place {} in target selector. Remove nonpic. * gcc.target/i386/fentry.c: Likewise. Does this change the pass/fail result of the test on a system without an updated glibc? jeff
Re: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips
Maciej W. Rozycki ma...@linux-mips.org writes: On Tue, 13 Jan 2015, Matthew Fortune wrote: I have tested this for both mips and micromips, and the tests now pass successfully. The ChangeLog and patch are below. Hmm, instead of trying to avoid testing microMIPS code generation just to satisfy the test suite I'd rather see the test cases updated so that LWM/SWM register ranges are expected and accepted whenever microMIPS code is produced. These scan patterns can be made conditional. FWIW I think Andrew's patch is correct. If we want to test microMIPS output against micromips-specific regexps, we should add a separate test that forces micromips, so that it gets tested regardless of people's RUNTESTFLAGS. Doing that shouldn't hold up Andrew's patch though. Taking care that the default compilation mode does not conflict (e.g. MIPS16, incompatible) and taking any exceptions into account (e.g. n64, unsupported) I presume, right? mips.exp sorts that out for you. Adding -mmicromips or (-micromips) to dg-options forces (or at least is supposed to force) the overall flags to be compatible with microMIPS. The aim of mips.exp is avoid skipping tests whereever possible. If someone runs the testsuite with -mips16 and we have a -micromips test, it's better to remove -mips16 for that test than to skip the test entirely. I was going to suggest a follow up patch to add copies of the three tests as Richard suggests. I haven't yet done a micromips run of the testsuite to check for any other issues like this but I suspect problems are limited to the tests that I recently added. Please always try to test changes reasonably, i.e. at least o32, o32/MIPS16, o32/microMIPS, n32, n64, and then Linux and ELF if applicable, plus any options that may be relevant, unless it is absolutely clear ABI/ISA variations do not matter for a change proposed. TBH this seems a bit much. On the one hand it's more testing than you'd get for almost any other target, but on the other it leaves out important differences like MIPS I vs MIPS II vs MIPS 32, MIPS III vs MIPS IV vs MIPS64, r1 vs. r2 vs. r6, Octeon vs. Loongson vs. vanilla, DSP vs. no DSP, etc. I think we just have to accept that there are so many possible combinations that we can't test everything that's potentially relevant. I think it's more useful to be flexible than prescribe a particular list. Having everyone test the same multilib combinations on the same target isn't necessarily a good thing anyway. Diversity in testing (between developers) is useful too. Thanks, Richard
Re: [PATCH][rtlanal.c][BE][1/2] Fix vector load/stores to not use ld1/st1
On 01/13/15 11:55, Eric Botcazou wrote: (1) we have a non-paradoxical subreg; (2) both (reg:ymode xregno) and (reg:xmode xregno) occupy full hard registers (no padding or unused upper bits); (3) (reg:ymode xregno) and (reg:xmode xregno) store the same number of bytes (X) in each constituent hard register; (4) the offset is a multiple of X, i.e. the data we're accessing is aligned to a register boundary; and (5) endianness is regular (no differences between words and bytes, or between registers and memory) OK, that's a nice translation of the new code. :-) It seems to me that the patch wants to extend the support of generic subregs to modes whose sizes are not multiple of each other, which is a requirement of the existing code, but does that in a very specific case for the sake of the ARM port without saying where all the above restrictions come from. Basically we're lifting the restriction that the the sizes are multiples of each other. The requirements above are the set where we know it will work. They are target independent, but happen to match what the ARM needs. The certainly do short circuit the meat of the function, that's the whole point, there's this set of conditions under which we know this will work and when they hold, we bypass. Now one could argue that instead of bypassing we should put the code to handle this situation further down. I'd be leery of doing that just from a complexity standpoint. But one could also argue that short circuiting like the patch does adds complexity as well and may be a bit kludgy. Maybe the way forward here is for someone to try and integrate this support in the main part of the code and see how it looks. Then we can pick one. The downside is since this probably isn't a regression that work would need to happen quickly to make it into gcc-5. Which leads to another option, get the release managers to sign off on the kludge after gcc-5 branches and only install the kludge on the gcc-5 branch and insisting the other solution go in for gcc-6 and beyond. Not sure if they'd do that, but it's a discussion that could happen. jeff
Re: [PATCH] add option to emit more array bounds warnigs
Jeff Law l...@redhat.com: On 01/13/15 17:40, Martin Uecker wrote: Jeff Law l...@redhat.com: On 01/13/15 10:34, Martin Uecker wrote: Mon, 12 Jan 2015 11:00:44 -0700 Jeff Law l...@redhat.com: On 11/11/14 23:13, Martin Uecker wrote: ... Has this patch been bootstrapped and regression tested, if so on what platform. x86_64-unknown-linux-gnu Approved. Please install on the trunk. Sorry about the delays. I don't have write access ;-( I fixed up the ChangeLog entries and installed the patch for you. Thank you, Jeff! If you plan to contribute regularly, you should go ahead and apply for write access to the repository so that you'll be able to commit your own patches once they're approved. I put a request in with you as sponsor (hope this is ok). You'll also need to make sure you have an assignment on file with the FSF.That patch was pretty small (the testcase was larger than the patch itself, which I always like :-) so I didn't request an assignment. Further submissions likely will require an assignment. I already have an assignment on file. Martin
Re: [PATCH] PR59448 - Promote consume to acquire
On 01/13/2015 01:38 PM, Torvald Riegel wrote: On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote: On 01/13/2015 09:59 AM, Richard Biener wrote: On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. That was the original patch. The issue is that it promotes consume to acquire before any error checking gets to look at the model, so then we allow illegal specification of consume. (It actually triggers a failure in the testsuite) (This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c) The documentation of the atomic builtins also disallows mo_consume on atomic_exchange. However, I don't see any such requirement in C11 or C++14 (and I'd be surprised to see it in C++11). It would be surprising also because for other atomic read-modify-write operations (eg, fetch_add), we don't make such a requirement in the builtins docs -- and atomic_exchange is just a read-modify-write with a noop, basically. Does anyone remember why this requirement for no consume on exchange was added, or sees a reason to keep it? If not, I think we should drop it. This would solve the testsuite failure for Andrew. Dropping it would prevent GCC from checking the consume-on-success / acquire-on-failure case for compare_excahnge I mentioned previously, but I think that this is pretty harmless. I could imagine that, for some reason, either backends or libatomic do not implement consume on atomic_exchange just because the docs disallowed it -- but I haven't checked that. I imagine it was probably in a previous incarnation of the standard... Most of this was actually implemented based on very early draft standards years and years ago and never revised. It wasnt put in by me unless the standard at some point said had such wording. The current standard appears to make no mention of the situation. It seems that it should be safe to move back to the original patch, and remove that error test for using consume on an exchange... Andrew
Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
On Tue, Jan 13, 2015 at 12:45:27PM -0700, Jeff Law wrote: On 01/13/15 09:38, Segher Boessenkool wrote: On Tue, Jan 13, 2015 at 05:18:19PM +0100, Jakub Jelinek wrote: 3) on request from Richard (which Segher on IRC argues against), memory clobber also prevents CSE; As extend.texi used to say: If your assembler instructions access memory in an unpredictable fashion, add @samp{memory} to the list of clobbered registers. This causes GCC to not keep memory values cached in registers across the assembler instruction and not optimize stores or loads to that memory. You also should add the @code{volatile} keyword if the memory affected is not listed in the inputs or outputs of the @code{asm}, as the @samp{memory} clobber does not count as a side-effect of the @code{asm}. so a memory clobber in a non-volatile asm should not prevent CSE. My reading of that paragraph is somewhat different. The key here is the memory clobber affects optimization of instructions around the asm while the volatile specifier affects the optimization of the ASM itself. A memory clobber must inhibit CSE of memory references on either side of the asm because the asm must be assumed to read or write memory in unpredictable ways. The volatile specifier tells the compiler that the asm itself must be preserved, even if dataflow shows the outputs as not used. That is not necessarily in conflict. My reading of Jeff's comment is that in int a; int foo (void) { int b, c, d, e; b = a; asm (... : =r (c) : : memory); d = a; asm (... : =r (e) : : memory); return b + d + 2 * (c + e); } we are not allowed to CSE d = a; into d = b;. CSE invalidate_from_clobbers should ensure that already, even when we don't do anything special about memory clobber in the patch. Another thing is if there is a store in between the two non-volatile asms with memory clobber, here I'm not sure if with the alternate patch we'd treat the memory clobber as use of everything previously stored into memory (in this regard the posted version is safe). And finally there is the case of non-volatile asm with memory clobber with no memory stores in between the two - the posted (safer) patch will not allow to CSE the two, while in theory we could CSE them into just one asm. Jakub
Re: PR54442 build_qualified_type produces a non-canonical type
Hi, On 06/09/2014 04:46 PM, Jason Merrill wrote: On 06/09/2014 10:32 AM, Marc Glisse wrote: On Mon, 9 Jun 2014, Jason Merrill wrote: On 06/09/2014 10:18 AM, Marc Glisse wrote: I doubt the patch can be wrong, but it may be that this is a situation that is not supposed to happen and should be fixed elsewhere? Seems likely. What is the difference between the type returned from build_qualified_type (TYPE_CANONICAL and it's TYPE_CANONICAL? I would expect them to be the same. throws tree_list 0x7660e5c8 purpose integer_cst 0x764d6ba0 constant 1 (in what build_qualified_type returns) I guess that makes sense, given that the exception specification isn't really part of the type. The patch is OK. In fact, I noticed today that this is a 4.8/4.9 Regression too. Shall I try to apply the patchlet to 4_9-branch too and, if testing passes, commit there and close the bug? Thanks, Paolo.
Re: [PATCH] PR59448 - Promote consume to acquire
On 01/13/15 11:38, Torvald Riegel wrote: On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote: On 01/13/2015 09:59 AM, Richard Biener wrote: On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. That was the original patch. The issue is that it promotes consume to acquire before any error checking gets to look at the model, so then we allow illegal specification of consume. (It actually triggers a failure in the testsuite) (This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c) The documentation of the atomic builtins also disallows mo_consume on atomic_exchange. However, I don't see any such requirement in C11 or C++14 (and I'd be surprised to see it in C++11). It would be surprising also because for other atomic read-modify-write operations (eg, fetch_add), we don't make such a requirement in the builtins docs -- and atomic_exchange is just a read-modify-write with a noop, basically. Does anyone remember why this requirement for no consume on exchange was added, or sees a reason to keep it? If not, I think we should drop it. This would solve the testsuite failure for Andrew. Dropping it would prevent GCC from checking the consume-on-success / acquire-on-failure case for compare_excahnge I mentioned previously, but I think that this is pretty harmless. I could imagine that, for some reason, either backends or libatomic do not implement consume on atomic_exchange just because the docs disallowed it -- but I haven't checked that. AFAICT that test has been there since the initial commit of sync-mem-invalid.c (which was later renamed to atomic-invalid). In fact, that was the only test initially in sync-mem-invalid.c commit 64d1dbf10e3f08305f4a8569e27fc2224f9074d2 Author: amacleod amacleod@138bc75d-0d04-0410-961f-82ee72b054a4 Date: Thu Jun 23 13:09:31 2011 + Basica tests for __sync_mem_exchange and framework for further additions. * lib/target-support.exp (check_effective_target_sync_int_128, check_effective_target_sync_long_long): Check whether the target supports 64 and 128 bit __sync builtins. * gcc.dg/sync-mem.h: New. Common code to check memory model __syncs. * gcc.dg/sync-mem-1.c: New. Check char size. * gcc.dg/sync-mem-2.c: New. Check short size. * gcc.dg/sync-mem-3.c: New. Check int size. * gcc.dg/sync-mem-4.c: New. Check long long. * gcc.dg/sync-mem-5.c: New. Check 128 bit. * gcc.dg/sync-mem-invalid.c: New. Check invalid memory modes. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/cxx-mem-model@175331 138bc75d-0d04-0410-961f-82ee72b054a4 Mostly hoping this refreshes Andrew's memory and he can provide some insight on why we test this particular combination and consider it invalid. I was kind of hoping that we'd track this down to something like a particular target didn't support this capability with the old sync builtins and we carried it into the atomics when we made that switch. I don't have a vested interest in either approach. I just want to see us DTRT. jeff
Re: [PATCH] Fix ICE with -fgnu-tm and pragma ivdep (PR middle-end/64391)
On 01/13/15 09:28, Marek Polacek wrote: We ICE on this testcase, because the usage of #pragma GCC ivdep pulls in the ANNOTATE internal functions which don't have underlying fndecls, hence we segv on a NULL_TREE. This patch makes get_attrs_for be prepared for such a scenario. The callers of get_attrs_for already check for NULL_TREE. I don't think internal fns can have transaction_* attributes anyway. While at it, I did some cleanups. Bootstrapped/regtested on {ppc64,x86_64}-linux, ok for trunk? 2015-01-13 Marek Polacek pola...@redhat.com PR middle-end/64391 * trans-mem.c (get_attrs_for): Return NULL_TREE if X is NULL_TREE. * gcc.dg/tm/pr64391.c: New test. OK. I looked briefly at perhaps catching this earlier in the call chain, but your approach looks best to me. Thanks, Jeff
Re: [testsuite] PATCH: Add check_effective_target_pie
On 01/13/15 05:52, H.J. Lu wrote: On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: On 01/12/15 14:51, Magnus Granberg wrote: måndag 12 januari 2015 12.11.17 skrev H.J. Lu: On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote: On 01/12/15 12:59, H.J. Lu wrote: I don't know if -pg will work PIE on any targets. For Linux/x86 the choices of crt1.o are %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} -shared, -pg and -pie are mutually exclusive. Those crt1 files are only crt1 files provided by glibc. You can't even try -pg -pie on Linux without changing glibc. You're totally missing the point. What I care about is *why*. With -pg it use gcrt1.o object file and that file is not compile with -fPIC. When you build a shared lib on x86_64 all the objects files need to be buiit with -fPIC else you get a error like that one abow and it is the same problems when you build bin with -fPIE and linke with -pie. Glibc do not provide one that is compile with -fPIC Is there some reason why glibc could not provide gcrt1.o compiled with -fPIC? That is a good question. We can compile gcrt1.o with -fPIC and it will work with both -pg and -pg -pie. I will open a glibc bug. Thanks for getting the bug opened, there's a reasonable chance that we'll have the gcrt1.o we want in the not too distant future. Here is the updated patch without the check_profiling_available change. OK for trunk? Thanks. H.J. --- Subject: [PATCH 1/5] Add check_effective_target_pie Hi, This patch adds check_effective_target_pie to check if the current multilib generates PIE by default. Thanks. H.J. --- 2015-01-11 H.J. Lu hongjiu...@intel.com * gcc.target/i386/pie.c: New test. * lib/target-supports.exp (check_effective_target_pie): New. OK. Jeff
Re: [testsuite] PATCH: Check if -pg available
On 01/13/15 05:54, H.J. Lu wrote: On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: On 01/12/15 14:51, Magnus Granberg wrote: måndag 12 januari 2015 12.11.17 skrev H.J. Lu: On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote: On 01/12/15 12:59, H.J. Lu wrote: I don't know if -pg will work PIE on any targets. For Linux/x86 the choices of crt1.o are %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} -shared, -pg and -pie are mutually exclusive. Those crt1 files are only crt1 files provided by glibc. You can't even try -pg -pie on Linux without changing glibc. You're totally missing the point. What I care about is *why*. With -pg it use gcrt1.o object file and that file is not compile with -fPIC. When you build a shared lib on x86_64 all the objects files need to be buiit with -fPIC else you get a error like that one abow and it is the same problems when you build bin with -fPIE and linke with -pie. Glibc do not provide one that is compile with -fPIC Is there some reason why glibc could not provide gcrt1.o compiled with -fPIC? Here is a patch to check if -pg is available. If -pg doesn't link, profiling isn't available. OK for trunk? OK with a suitable ChangeLog entry. jeff
Re: [PATCH][rtlanal.c][BE][1/2] Fix vector load/stores to not use ld1/st1
Sorry for the slow response. Jeff has approved the patch in the meantime, but I didn't want to go ahead and apply it while there was still disagreement... I still think that it isn't appropriate to short-circuit the main computation as the patch does, but I don't want to block it after Jeff's approval. (1) we have a non-paradoxical subreg; (2) both (reg:ymode xregno) and (reg:xmode xregno) occupy full hard registers (no padding or unused upper bits); (3) (reg:ymode xregno) and (reg:xmode xregno) store the same number of bytes (X) in each constituent hard register; (4) the offset is a multiple of X, i.e. the data we're accessing is aligned to a register boundary; and (5) endianness is regular (no differences between words and bytes, or between registers and memory) OK, that's a nice translation of the new code. :-) It seems to me that the patch wants to extend the support of generic subregs to modes whose sizes are not multiple of each other, which is a requirement of the existing code, but does that in a very specific case for the sake of the ARM port without saying where all the above restrictions come from. -- Eric Botcazou
Re: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips
Maciej W. Rozycki ma...@linux-mips.org writes: On Tue, 13 Jan 2015, Andrew Bennett wrote: The call-saved-{4-6}.c tests in the mips testsuite fail for micromips. The reason is that micromips uses the swm and lwm instructions to save/restore the call-saved registers rather than using the sw and lw instructions. The swm and lwm instructions only list the range of registers to use ie. $16-$25 and hence some of the scan-assembler patterns fail. This fix adds the NO_COMPRESSION attribute to the foo function to force the tests to always compile as mips. I have tested this for both mips and micromips, and the tests now pass successfully. The ChangeLog and patch are below. Hmm, instead of trying to avoid testing microMIPS code generation just to satisfy the test suite I'd rather see the test cases updated so that LWM/SWM register ranges are expected and accepted whenever microMIPS code is produced. These scan patterns can be made conditional. FWIW I think Andrew's patch is correct. If we want to test microMIPS output against micromips-specific regexps, we should add a separate test that forces micromips, so that it gets tested regardless of people's RUNTESTFLAGS. Doing that shouldn't hold up Andrew's patch though. Whereever possible gcc.target/mips should not have conditional dg-finals. Thanks, Richard
RE: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips
Richard Sandiford rdsandif...@googlemail.com writes: Maciej W. Rozycki ma...@linux-mips.org writes: On Tue, 13 Jan 2015, Andrew Bennett wrote: The call-saved-{4-6}.c tests in the mips testsuite fail for micromips. The reason is that micromips uses the swm and lwm instructions to save/restore the call-saved registers rather than using the sw and lw instructions. The swm and lwm instructions only list the range of registers to use ie. $16-$25 and hence some of the scan-assembler patterns fail. This fix adds the NO_COMPRESSION attribute to the foo function to force the tests to always compile as mips. I have tested this for both mips and micromips, and the tests now pass successfully. The ChangeLog and patch are below. Hmm, instead of trying to avoid testing microMIPS code generation just to satisfy the test suite I'd rather see the test cases updated so that LWM/SWM register ranges are expected and accepted whenever microMIPS code is produced. These scan patterns can be made conditional. FWIW I think Andrew's patch is correct. If we want to test microMIPS output against micromips-specific regexps, we should add a separate test that forces micromips, so that it gets tested regardless of people's RUNTESTFLAGS. Doing that shouldn't hold up Andrew's patch though. Whereever possible gcc.target/mips should not have conditional dg- finals. I was going to suggest a follow up patch to add copies of the three tests as Richard suggests. I haven't yet done a micromips run of the testsuite to check for any other issues like this but I suspect problems are limited to the tests that I recently added. I certainly agree that we shouldn't just ignore micromips expected output given it is pretty easy to test. Please go ahead and commit this patch so we clean up the test results for GCC 5 in case you (or anyone else) doesn't get to submitting the extra test cases before we hit stage 4. Thanks, Matthew
Re: [PATCH] PR59448 - Promote consume to acquire
On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote: On 01/13/2015 09:59 AM, Richard Biener wrote: On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. That was the original patch. The issue is that it promotes consume to acquire before any error checking gets to look at the model, so then we allow illegal specification of consume. (It actually triggers a failure in the testsuite) (This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c) The documentation of the atomic builtins also disallows mo_consume on atomic_exchange. However, I don't see any such requirement in C11 or C++14 (and I'd be surprised to see it in C++11). It would be surprising also because for other atomic read-modify-write operations (eg, fetch_add), we don't make such a requirement in the builtins docs -- and atomic_exchange is just a read-modify-write with a noop, basically. Does anyone remember why this requirement for no consume on exchange was added, or sees a reason to keep it? If not, I think we should drop it. This would solve the testsuite failure for Andrew. Dropping it would prevent GCC from checking the consume-on-success / acquire-on-failure case for compare_excahnge I mentioned previously, but I think that this is pretty harmless. I could imagine that, for some reason, either backends or libatomic do not implement consume on atomic_exchange just because the docs disallowed it -- but I haven't checked that.
Re: [PATCH] Fix for PR64081 in RTL loop unroller
On 01/13/15 11:01, Zamyatin, Igor wrote: Is it really sufficient here to verify that all the defs are on latch predecessors, what about the case where there is a predecessor without a def. How do you guarantee domination in that case? ISTM that given the structure for the code you're writing that you'd want to verify that in the event of multiple definitions that all of them appear on immediate predecessors of the latch *and* that each immediate predecessor has a definition. Yes, do you think it's better to check exactly immediate predecessors? I'd use the same structure that you have in iv_get_reaching_def. If there was a reasonable way to factor that test into a single function and call it from both places that would be even better. Jeff
Re: [PATCH] add option to emit more array bounds warnigs
On 01/13/15 10:34, Martin Uecker wrote: Mon, 12 Jan 2015 11:00:44 -0700 Jeff Law l...@redhat.com: On 11/11/14 23:13, Martin Uecker wrote: ... * gcc/tree-vrp.c (check_array_ref): Emit more warnings for warn_array_bounds = 2. * gcc/testsuite/gcc.dg/Warray-bounds-11.c: New test-case. * gcc/c-family/c.opt: New option -Warray-bounds=. * gcc/common.opt: New option -Warray-bounds=. * gcc/doc/invoke.texi: Document new option. Has this patch been bootstrapped and regression tested, if so on what platform. x86_64-unknown-linux-gnu Approved. Please install on the trunk. Sorry about the delays. Thanks, Jeff
Re: [PATCH] Fix REE for vector modes (PR rtl-optimization/64286, take 2)
On 01/13/15 09:11, Jakub Jelinek wrote: On Mon, Jan 12, 2015 at 02:29:53PM -0700, Jeff Law wrote: On 01/12/15 12:59, Jakub Jelinek wrote: Hi! As mentioned in the PR, giving up for all vector mode extensions is unnecessary, but unlike scalar integer extensions, where the low part of the extended value is the original value, for vectors this is not true, thus the old value is lost. Which means we can perform REE, but only if all uses of the definition are the same (code+mode) extension. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2015-01-12 Jakub Jelinek ja...@redhat.com PR rtl-optimization/64286 * ree.c (add_removable_extension): Don't add vector mode extensions if all uses of the source register aren't the same vector extensions. * gcc.target/i386/avx2-pr64286.c: New test. Does it make sense to remove your change for 59754 in combine_reaching_defs? Shouldn't this patch handle that case as well? You're right, this patch handles that too. New patch, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2015-01-13 Jakub Jelinek ja...@redhat.com PR rtl-optimization/64286 * ree.c (combine_reaching_defs): Move part of comment earlier, remove !SCALAR_INT_MODE_P check. (add_removable_extension): Don't add vector mode extensions if all uses of the source register aren't the same vector extensions. * gcc.target/i386/avx2-pr64286.c: New test. OK. Thanks for taking care of this. I can't seem to find time for doing any real debugging or bugfixing. jeff
Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
On 01/13/15 09:38, Segher Boessenkool wrote: On Tue, Jan 13, 2015 at 05:18:19PM +0100, Jakub Jelinek wrote: 3) on request from Richard (which Segher on IRC argues against), memory clobber also prevents CSE; As extend.texi used to say: If your assembler instructions access memory in an unpredictable fashion, add @samp{memory} to the list of clobbered registers. This causes GCC to not keep memory values cached in registers across the assembler instruction and not optimize stores or loads to that memory. You also should add the @code{volatile} keyword if the memory affected is not listed in the inputs or outputs of the @code{asm}, as the @samp{memory} clobber does not count as a side-effect of the @code{asm}. so a memory clobber in a non-volatile asm should not prevent CSE. My reading of that paragraph is somewhat different. The key here is the memory clobber affects optimization of instructions around the asm while the volatile specifier affects the optimization of the ASM itself. A memory clobber must inhibit CSE of memory references on either side of the asm because the asm must be assumed to read or write memory in unpredictable ways. The volatile specifier tells the compiler that the asm itself must be preserved, even if dataflow shows the outputs as not used. eff
Re: [PATCH] add option to emit more array bounds warnigs
Jeff Law l...@redhat.com: On 01/13/15 10:34, Martin Uecker wrote: Mon, 12 Jan 2015 11:00:44 -0700 Jeff Law l...@redhat.com: On 11/11/14 23:13, Martin Uecker wrote: ... Has this patch been bootstrapped and regression tested, if so on what platform. x86_64-unknown-linux-gnu Approved. Please install on the trunk. Sorry about the delays. I don't have write access ;-( Martin
Re: Housekeeping work in backends.html
Eric Botcazou ebotca...@adacore.com writes: Some ports are missing (lm32, moxie, nios2, nvptx, rl78, rx) so the relevant maintainers are CCed (see 6.3.9 Anatomy of a Target Back End in the doc). I think I got this right | Characteristics Target | HMSLQNFICBD lqrcpfgmbdiates ---+ moxie | F g ds AG
Re: [patch 1/2][ARM]: New CPU support for Marvell Whitney
On 09/01/2015 19:22, Kyrill Tkachov wrote: Hi Xingxing, On 19/12/14 11:01, Xingxing Pan wrote: +/* Return true if vector element size is byte. */ Minor nit: two spaces after full stop and before */ Same in other places in the patch. +bool +marvell_whitney_vector_element_size_is_byte (rtx insn) +{ + if (GET_CODE (PATTERN (insn)) == SET) +{ + if ((GET_MODE (SET_DEST (PATTERN (insn))) == V8QImode) || + (GET_MODE (SET_DEST (PATTERN (insn))) == V16QImode)) + return true; +} + + return false; +} I see this is called from inside marvell-whitney.md. It seems to me that this function takes RTX insns. Can the type of this be strengthened to rtx_insn * ? Also, this should be refactored and written a bit more generally by checking for VECTOR_MODE_P and then GET_MODE_INNER for QImode, saving you the trouble of enumerating the different vector QI modes. + +/* Return true if INSN has shift operation but is not a shift insn. */ +bool +marvell_whitney_non_shift_with_shift_operand (rtx insn) Similar comment. Can this be strengthened to rtx_insn * ? Thanks, Kyrill +{ + rtx pat = PATTERN (insn); + + if (GET_CODE (pat) != SET) +return false; + + /* Is not a shift insn. */ + rtx rvalue = SET_SRC (pat); + RTX_CODE code = GET_CODE (rvalue); + if (code == ASHIFT || code == ASHIFTRT + || code == LSHIFTRT || code == ROTATERT) +return false; + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, rvalue, ALL) +{ + /* Has shift operation. */ + RTX_CODE code = GET_CODE (*iter); + if (code == ASHIFT || code == ASHIFTRT + || code == LSHIFTRT || code == ROTATERT) +return true; +} + + return false; +} Hi Kyrill, Thanks for advice. Refactored patch is attached. -- Regards, Xingxing commit 3627056607b1e8604ac8d85ed44fdc7d3209cd3e Author: Xingxing Pan xxing...@marvell.com Date: Thu Dec 18 16:58:05 2014 +0800 2015-01-13 Xingxing Pan xxing...@marvell.com * config/arm/arm-cores.def: Add new core marvell-whitney. * config/arm/arm-protos.h: (marvell_whitney_vector_mode_qi): Declare. (marvell_whitney_inner_shift): Ditto. * config/arm/arm-tables.opt: Regenerated. * config/arm/arm-tune.md: Regenerated. * config/arm/arm.c (arm_marvell_whitney_tune): New structure. (arm_issue_rate): Add marvell_whitney. (marvell_whitney_vector_mode_qi): New function. (marvell_whitney_inner_shift): Ditto. * config/arm/arm.md: Include marvell-whitney.md. (generic_sched): Add marvell_whitney. (generic_vfp): Ditto. * config/arm/bpabi.h (BE8_LINK_SPEC): Add marvell-whitney. * config/arm/t-arm (MD_INCLUDES): Add marvell-whitney.md. * config/arm/marvell-whitney.md: New file. * doc/invoke.texi: Document marvell-whitney. diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index 6fa5d99..26eb7ab 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -159,6 +159,7 @@ ARM_CORE(cortex-m7, cortexm7, cortexm7, 7EM, FL_LDSCHED, cortex_m7) ARM_CORE(cortex-m4, cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) ARM_CORE(cortex-m3, cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) ARM_CORE(marvell-pj4, marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e) +ARM_CORE(marvell-whitney, marvell_whitney, marvell_whitney, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, marvell_whitney) /* V7 big.LITTLE implementations */ ARM_CORE(cortex-a15.cortex-a7, cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index fc45348..45001ae 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -231,6 +231,9 @@ extern void arm_order_regs_for_local_alloc (void); extern int arm_max_conditional_execute (); +extern bool marvell_whitney_vector_mode_qi (rtx_insn *insn); +extern bool marvell_whitney_inner_shift (rtx_insn *insn); + /* Vectorizer cost model implementation. */ struct cpu_vec_costs { const int scalar_stmt_cost; /* Cost of any scalar operation, excluding diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index ece9d5e..dc5f364 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -298,6 +298,9 @@ EnumValue Enum(processor_type) String(marvell-pj4) Value(marvell_pj4) EnumValue +Enum(processor_type) String(marvell-whitney) Value(marvell_whitney) + +EnumValue Enum(processor_type) String(cortex-a15.cortex-a7) Value(cortexa15cortexa7) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 452820ab..c73c33c 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -31,6 +31,7 @@ cortexa15,cortexa17,cortexr4, cortexr4f,cortexr5,cortexr7, cortexm7,cortexm4,cortexm3, - marvell_pj4,cortexa15cortexa7,cortexa17cortexa7, - cortexa53,cortexa57,cortexa57cortexa53 +
Re: [PATCH 4/4] Wire X-Gene 1 up in the ARM (32bit) backend as a AArch32-capable core.
On 12/01/15 20:15, Philipp Tomsich wrote: --- gcc/ChangeLog-2014| 10 ++ gcc/config/arm/arm-cores.def | 1 + gcc/config/arm/arm-tables.opt | 3 +++ gcc/config/arm/arm-tune.md| 3 ++- gcc/config/arm/arm.c | 22 ++ gcc/config/arm/arm.md | 11 +-- gcc/config/arm/bpabi.h| 2 ++ gcc/config/arm/t-arm | 1 + gcc/doc/invoke.texi | 3 ++- 9 files changed, 52 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog-2014 b/gcc/ChangeLog-2014 index dd49d7f..c3c62db 100644 --- a/gcc/ChangeLog-2014 +++ b/gcc/ChangeLog-2014 @@ -3497,6 +3497,16 @@ 63965. * config/rs6000/rs6000.c: Likewise. +2014-12-23 Philipp Tomsich philipp.toms...@theobroma-systems.com + + * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list. + Include xgene1.md. + * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1. + * config/arm/arm-cores.def (xgene1): New entry. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1. + 2014-11-22 Jan Hubicka hubi...@ucw.cz PR ipa/63671 diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index be125ac..fa13eb9 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -167,6 +167,7 @@ ARM_CORE(cortex-a17.cortex-a7, cortexa17cortexa7, cortexa7, 7A, FL_LDSCHED | /* V8 Architecture Processors */ ARM_CORE(cortex-a53, cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53) ARM_CORE(cortex-a57, cortexa57, cortexa15, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) +ARM_CORE(xgene1, xgene1,xgene1, 8A, FL_LDSCHED, xgene1) /* V8 big.LITTLE implementations */ ARM_CORE(cortex-a57.cortex-a53, cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index ece9d5e..1392429 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -310,6 +310,9 @@ EnumValue Enum(processor_type) String(cortex-a57) Value(cortexa57) EnumValue +Enum(processor_type) String(xgene1) Value(xgene1) + +EnumValue Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53) Enum diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 452820ab..dcd5054 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -32,5 +32,6 @@ cortexr4f,cortexr5,cortexr7, cortexm7,cortexm4,cortexm3, marvell_pj4,cortexa15cortexa7,cortexa17cortexa7, - cortexa53,cortexa57,cortexa57cortexa53 + cortexa53,cortexa57,xgene1, + cortexa57cortexa53 (const (symbol_ref ((enum attr_tune) arm_tune diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8ca2dd8..14c8a87 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1903,6 +1903,25 @@ const struct tune_params arm_cortex_a57_tune = ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */ }; +const struct tune_params arm_xgene1_tune = +{ + arm_9e_rtx_costs, + xgene1_extra_costs, + NULL,/* Scheduler cost adjustment. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true,/* Prefer LDRD/STRD. */ + {true, true},/* Prefer non short circuit. */ + arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 32 /* Maximum insns to inline memset. */ +}; + /* Branches can be dual-issued on Cortex-A5, so conditional execution is less appealing. Set max_insns_skipped to a low value. */ @@ -27066,6 +27085,9 @@ arm_issue_rate (void) { switch (arm_tune) { +case xgene1: + return 4; + case cortexa15: case cortexa57: return 3; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index c61057f..a3cbf3b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -109,6 +109,11 @@ ;; given instruction does not shift one of its input operands. (define_attr shift (const_int 0)) +;; [For compatibility with AArch64 in pipeline models] +;; Attribute that specifies whether or not the instruction touches fp +;; registers. +(define_attr fp no,yes (const_string no)) + ; Floating Point
Re: [RFC PATCH Fortran] make enum_9/10.f90 testcases work under FreeBSD ARM
On Sun, Jan 11, 2015 at 9:55 PM, Andreas Tobler andreast-l...@fgznet.ch wrote: Hi, I have here a possible way to make the enum_9.f90 and the enum_10.f90 work under arm*-*-freebsd*. The solution for enum_9.f90 is straight forward. But the one for enum_10.f90 requires a reordering of the dg-additional-sources line. This I do not understand yet, but maybe one of you does. If I have the original ordering and change the dg-options to check on 'target arm_eabi' I get strange compilation errors: Any patches that convert arm*-*-linux* to arm_eabi can be considered as obvious as long as you test them on an EABI compliant platform which freebsd appears to be getting towards. I haven't looked too deeply about the other ordering issue you mention here. regards Ramana --- /build/gcc/obj_gcc_armv6/gcc/testsuite/gfortran/../../gfortran -B/build/gcc/obj_gcc_armv6/gcc/testsuite/gfortran/../../ -B/build/gcc/obj_gcc_armv6/armv6-unknown-freebsd11.0/./libgfortran/ -fno-diagnostics-show-caret -fdiagnostics-color=never ./enum_10.c -c -o arm_eabi89728.o arm_eabi89728.c^M gfortran: fatal error: cannot specify -o with -c, -S or -E with multiple files^M compilation terminated.^M --- The -c comes after the enum_10.c Attached the solution which makes the tests pass. I tested them under FreeBSD armv6-*-freebsd11.0 and x86_64-unknown-freebsd11.0. Also under x86_64-unknown-linux-gnu. All PASS. Would this be ok for trunk? TIA, Andreas 2015-01-11 Andreas Tobler andre...@gcc.gnu.org * gfortran.dg/enum_9.f90: Replace arm*-*-linux* with arm_eabi. * gfortran.dg/enum_10.f90: Likewise. Reorder dg-additional-sources.
Re: [PATCH 3/4] Change the type of the prefetch-instructions to 'prefetch'.
On 12/01/15 20:15, Philipp Tomsich wrote: --- gcc/config/aarch64/aarch64.md | 2 +- gcc/config/arm/types.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 1f6b1b6..98f4f30 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -391,7 +391,7 @@ return pftype[INTVAL(operands[1])][locality]; } - [(set_attr type load1)] + [(set_attr type prefetch)] ) (define_insn trap diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index d368446..088c21a 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -118,6 +118,7 @@ ; mvn_shift_reg inverting move instruction, shifted operand by a register. ; no_insnan insn which does not represent an instruction in the ;final output, thus having no impact on scheduling. +; prefetch a prefetch instruction ; rbit reverse bits. ; revreverse bytes. ; sdiv signed division. @@ -556,6 +557,7 @@ call,\ clz,\ no_insn,\ + prefetch,\ csel,\ crc,\ extend,\ Can you follow up with a patch that adjusts the prefetch insn attribute in the ARM backend and uses this consistently in all the pipeline descriptions (i.e. treat this equivalent to load1 and make sure the compiler builds for AArch32 afterwards) ? It would be complete to do the same for all the pipeline descriptions in the AArch64 backend too. Ramana
Re: [PATCH, aarch64] Add prefetch support
On Tue, Jan 13, 2015 at 6:13 AM, Marcus Shawcroft marcus.shawcr...@gmail.com wrote: On 11 January 2015 at 02:37, Andrew Pinski pins...@gmail.com wrote: On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft marcus.shawcr...@gmail.com wrote: On 30 October 2014 08:54, Gopalasubramanian, Ganesh ganesh.gopalasubraman...@amd.com wrote: 2014-10-30 Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com Check the whitespace in your ChangeLog line. * config/arm/types.md (define_attr type): Add prefetch. The existing schedulers use 'load1'. We can of course split that into two introducing prefetch and update all of the existing schedulers to reflect the change. However I suggest we do that as a separate activity when someone actually needs the distinction, note this change will require updating the schedulers for both ARM and AArch64 backends not just those relevant to AArch64. For this prefetch patch I suggest we go with the existing load1. I will need this change for ThunderX schedule. The Pref instruction is single issued while load1 can be dual issued. Hi https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html Philipp when you deal with Ramana's request above to split load1-load1/prefetch in the existing schedulers I suggest you also split it in aarch64/thunderx.md in order to retain existing behaviour. Andrew can then follow up add the right behaviour when he is ready. Andrew OK ? Yes that sounds ok to me. I was going to submit an update to thunderx.md file this week anyways. Thanks, Andrew Cheers /Marcus
Re: [PATCH, aarch64] Add prefetch support
Great. I should have an update patch-set ready tested later tonight. Best, Phil. On 13 Jan 2015, at 15:18, Andrew Pinski pins...@gmail.com wrote: On Tue, Jan 13, 2015 at 6:13 AM, Marcus Shawcroft marcus.shawcr...@gmail.com wrote: On 11 January 2015 at 02:37, Andrew Pinski pins...@gmail.com wrote: On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft marcus.shawcr...@gmail.com wrote: On 30 October 2014 08:54, Gopalasubramanian, Ganesh ganesh.gopalasubraman...@amd.com wrote: 2014-10-30 Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com Check the whitespace in your ChangeLog line. * config/arm/types.md (define_attr type): Add prefetch. The existing schedulers use 'load1'. We can of course split that into two introducing prefetch and update all of the existing schedulers to reflect the change. However I suggest we do that as a separate activity when someone actually needs the distinction, note this change will require updating the schedulers for both ARM and AArch64 backends not just those relevant to AArch64. For this prefetch patch I suggest we go with the existing load1. I will need this change for ThunderX schedule. The Pref instruction is single issued while load1 can be dual issued. Hi https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html Philipp when you deal with Ramana's request above to split load1-load1/prefetch in the existing schedulers I suggest you also split it in aarch64/thunderx.md in order to retain existing behaviour. Andrew can then follow up add the right behaviour when he is ready. Andrew OK ? Yes that sounds ok to me. I was going to submit an update to thunderx.md file this week anyways. Thanks, Andrew Cheers /Marcus
Re: [PATCH 2/4] Pipeline model for APM XGene-1.
On 13/01/15 13:46, Marcus Shawcroft wrote: On 12 January 2015 at 20:15, Philipp Tomsich philipp.toms...@theobroma-systems.com wrote: --- gcc/config/aarch64/aarch64.md | 1 + gcc/config/arm/xgene1.md | 531 ++ 2 files changed, 532 insertions(+) create mode 100644 gcc/config/arm/xgene1.md diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 12e1054..1f6b1b6 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -190,6 +190,7 @@ (include ../arm/cortex-a53.md) (include ../arm/cortex-a15.md) (include thunderx.md) +(include ../arm/xgene1.md Can we have a ChangeLog entry please. /Marcus And please update the copyright years in xgene1.md. R.
Re: [PATCH, aarch64] Add prefetch support
On 11 January 2015 at 02:37, Andrew Pinski pins...@gmail.com wrote: On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft marcus.shawcr...@gmail.com wrote: On 30 October 2014 08:54, Gopalasubramanian, Ganesh ganesh.gopalasubraman...@amd.com wrote: 2014-10-30 Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com Check the whitespace in your ChangeLog line. * config/arm/types.md (define_attr type): Add prefetch. The existing schedulers use 'load1'. We can of course split that into two introducing prefetch and update all of the existing schedulers to reflect the change. However I suggest we do that as a separate activity when someone actually needs the distinction, note this change will require updating the schedulers for both ARM and AArch64 backends not just those relevant to AArch64. For this prefetch patch I suggest we go with the existing load1. I will need this change for ThunderX schedule. The Pref instruction is single issued while load1 can be dual issued. Hi https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html Philipp when you deal with Ramana's request above to split load1-load1/prefetch in the existing schedulers I suggest you also split it in aarch64/thunderx.md in order to retain existing behaviour. Andrew can then follow up add the right behaviour when he is ready. Andrew OK ? Cheers /Marcus
Re: [testsuite] PATCH: Add check_effective_target_pie
On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: On 01/12/15 14:51, Magnus Granberg wrote: måndag 12 januari 2015 12.11.17 skrev H.J. Lu: On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote: On 01/12/15 12:59, H.J. Lu wrote: I don't know if -pg will work PIE on any targets. For Linux/x86 the choices of crt1.o are %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} -shared, -pg and -pie are mutually exclusive. Those crt1 files are only crt1 files provided by glibc. You can't even try -pg -pie on Linux without changing glibc. You're totally missing the point. What I care about is *why*. With -pg it use gcrt1.o object file and that file is not compile with -fPIC. When you build a shared lib on x86_64 all the objects files need to be buiit with -fPIC else you get a error like that one abow and it is the same problems when you build bin with -fPIE and linke with -pie. Glibc do not provide one that is compile with -fPIC Is there some reason why glibc could not provide gcrt1.o compiled with -fPIC? I opened a glibc bug: https://sourceware.org/bugzilla/show_bug.cgi?id=17836 and submitted a patch: https://sourceware.org/ml/libc-alpha/2015-01/msg00284.html H.J.
[PATCH] PR59448 - Promote consume to acquire
Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Andrew * builtins.c (memmodel_consume_fix) : New. Promote consume to acquire. (expand_builtin_atomic_exchange, expand_builtin_atomic_compare_exchange, expand_builtin_atomic_load, expand_builtin_atomic_fetch_op, expand_builtin_atomic_clear, expand_builtin_atomic_test_and_set, expand_builtin_atomic_thread_fence, expand_builtin_atomic_signal_fence): Call memmodel_consume_fix. Index: builtins.c === *** builtins.c (revision 219462) --- builtins.c (working copy) *** get_memmodel (tree exp) *** 5368,5373 --- 5368,5382 return (enum memmodel) val; } + /* Workaround for Bugzilla 59448. GCC doesn't track consume properly, so +be conservative and promote consume to acquire. */ + static void + memmodel_consume_fix (enum memmodel val) + { + if (val == MEMMODEL_CONSUME) + val = MEMMODEL_ACQUIRE; + } + /* Expand the __atomic_exchange intrinsic: TYPE __atomic_exchange (TYPE *object, TYPE desired, enum memmodel) EXP is the CALL_EXPR. *** expand_builtin_atomic_exchange (machine_ *** 5389,5394 --- 5398,5405 if (!flag_inline_atomics) return NULL_RTX; + memmodel_consume_fix (model); + /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); *** expand_builtin_atomic_compare_exchange ( *** 5434,5439 --- 5445,5453 if (!flag_inline_atomics) return NULL_RTX; + memmodel_consume_fix (success); + memmodel_consume_fix (failure); + /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); *** expand_builtin_atomic_load (machine_mode *** 5493,5498 --- 5507,5514 if (!flag_inline_atomics) return NULL_RTX; + memmodel_consume_fix (model); + /* Expand the operand. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); *** expand_builtin_atomic_fetch_op (machine_ *** 5553,5558 --- 5569,5576 model = get_memmodel (CALL_EXPR_ARG (exp, 2)); + memmodel_consume_fix (model); + /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); *** expand_builtin_atomic_clear (tree exp) *** 5627,5632 --- 5645,5652 return const0_rtx; } + memmodel_consume_fix (model); + if (HAVE_atomic_clear) { emit_insn (gen_atomic_clear (mem, model)); *** expand_builtin_atomic_test_and_set (tree *** 5658,5664 mode = mode_for_size (BOOL_TYPE_SIZE, MODE_INT, 0); mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); ! return expand_atomic_test_and_set (target, mem, model); } --- 5678,5684 mode = mode_for_size (BOOL_TYPE_SIZE, MODE_INT, 0); mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); model = get_memmodel (CALL_EXPR_ARG (exp, 1)); ! memmodel_consume_fix (model); return expand_atomic_test_and_set (target, mem, model); } *** static void *** 5797,5802 --- 5817,5823 expand_builtin_atomic_thread_fence (tree exp) { enum memmodel model = get_memmodel (CALL_EXPR_ARG (exp, 0)); + memmodel_consume_fix (model); expand_mem_thread_fence (model); } *** static void *** 5808,5813 --- 5829,5835 expand_builtin_atomic_signal_fence (tree exp) { enum memmodel model = get_memmodel (CALL_EXPR_ARG (exp, 0)); + memmodel_consume_fix (model);
[patch] libstdc++/64571 export fstream functions using new std::string
The existing tests for these functions are compile-only so didn't catch that I forgot to export these new symbols. I'll add a better test next week. Tested x86_64-linux, committed to trunk. commit d428e75af04d995451a917ef7c9caed6b8cee737 Author: Jonathan Wakely jwak...@redhat.com Date: Tue Jan 13 14:27:34 2015 + PR libstdc++/64571 * config/abi/pre/gnu.ver: Export fstream functions using new string. diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index 7bb65e9..700da18 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -1648,6 +1648,13 @@ GLIBCXX_3.4.21 { _ZStlsI[cw]St11char_traitsI[cw]ESaI[cw]EERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_string*; _ZStrsI[cw]St11char_traitsI[cw]ESaI[cw]EERSt13basic_istreamIT_T0_ES7_RNSt7__cxx1112basic_string*; +# fstream functions taking ABI-tagged std::string +_ZNSt13basic_filebufI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*; +_ZNSt13basic_fstreamI[cw]St11char_traitsI[cw]EEC1ERKNSt7__cxx1112basic_string*; +_ZNSt13basic_fstreamI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*; +_ZNSt14basic_[io]fstreamI[cw]St11char_traitsI[cw]EEC1ERKNSt7__cxx1112basic_string*; +_ZNSt14basic_[io]fstreamI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*; + # std::locale::name() returning new std::string _ZNKSt6locale4nameB5cxx11Ev;
Re: [PATCH] PR59448 - Promote consume to acquire
On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. Andrew
Re: [PATCH][ARM] FreeBSD ARM support, EABI, v3
On Thu, Jan 8, 2015 at 8:51 PM, Andreas Tobler andreast-l...@fgznet.ch wrote: On 08.01.15 17:27, Richard Earnshaw wrote: On 29/12/14 18:44, Andreas Tobler wrote: All, here is the third attempt to support ARM with FreeBSD. In the meantime we found another issue in the unwinder where I had to adapt some stuff. The unwind_phase2_forced function in libgcc calls a stop_fn function. This stop_fn is in FreeBSD's libthr implementation and is called thread_unwind_stop. This thread_unwind_stop is a generic function used on all FreeBSD archs. The issue is now that this thread_unwind_stop expects a double int for the exception_class, like on every other arch. For ARM EABI this exception_class is an array of char which is passed in one register as pointer vs. two registers for a double int. To solve this issue we defined the exception_class as double integer for FreeBSD. My apologies for the slow response, some other work and then holidays intervened. From my understanding of the ABI document the implementation is currently as mandated by the ABI. Also this isn't a part of the ABI that's available for the platform (here FreeBSD to manipulate and change as per it's wishes). ARM EHABI is special for software, making FreeBSD more special for ARM appears to be counter intuitive from my point of view. A number of exception unwinding libraries. for e.g. libobjc , libstdc++ all use this implementation of exception_class. Therefore this creates a divergence for the FreeBSD port which is different from everything else. I expect that a number of language run time support libraries that supported the ARM EHABI would be using such an implementation, therefore you need to fix every single implementation of this in every unwinder that supports the ARM EHABI which I expect to have been ported to in a number of libraries already. (I already see this in libobjc and libstdc++ in the GCC tree) I would rather fix the thread_unwind_stop implementation in libthr for ARM EHABI rather than make this change. This adaptation reduced the failure count in libstdc++ by about 40 fails. I build and test this port on a regular basis and I post the results to the usual place. Thanks for doing this. I'm really glad that FreeBSD is finally moving to EABI. regards Ramana ... Umm, sorry, just seen this update to the previous patch. The changes to the exception unwinding look a bit more involved. Could you separate that out into a separate patch, so that it's easier to see what you're changing? Ok, here the mentioned part as separate diff. The comments are above. The CL below :) Thank you very much! Andreas gcc: * ginclude/unwind-arm-common.h (_Uwind_Control_Block): Define exception_class as double integer for FreeBSD ARM. (_Unwind_Exception): Define _Unwind_Exception_Class as double integer for FreeBSD ARM. libstc++-v3: * libsupc++/unwind-cxx.h (__is_gxx_exception_class, __is_dependent_exception): Exclude FreeBSD ARM from the __ARM_EABI_UNWINDER__ ifdef.
Re: [PATCH] Fix PR64461, Incorrect code on coldfire targets
Jeff Law l...@redhat.com writes: For fun I've got an m68k bootstrap of the trunk running. I don't expect it to finish for at least a week or so, assuming it runs to completion. The last time I did that it took about 10 days (with all languages enabled, running in Aranym on a moderately fast host). Andreas. -- Andreas Schwab, SUSE Labs, sch...@suse.de GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7 And now for something completely different.
Re: flatten expr.h (version 2)
On 13 January 2015 at 15:34, Richard Biener rguent...@suse.de wrote: On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote: Hi, This is a revamped expr.h flattening flattening patch rebased on tree.h and tree-core.h flattening patch (r219402). It depends upon the following patch to get committed. https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html Changes: * Removed all includes except tree-core.h. Put includes required by expr.h in a comment. * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively. * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, genoutput.c. * Did not put includes in gcc-plugin.h since expr.h cannot be included by plugins (putting them broke building a file in c-family/ since expr.h is not allowed in front-ends) * Affects java front-end (expr.h is allowed in java front-end). Bootstrapped and tested on x86_64-unknown-linux-gnu with languages: all,go,ada,jit Built on all targets in config-list.mk with languages: all, go. OK to commit ? diff --git a/gcc/expr.c b/gcc/expr.c index fc22862..824541e 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -41,11 +41,17 @@ along with GCC; see the file COPYING3. If not see #include regs.h #include hard-reg-set.h #include except.h -#include input.h #include function.h #include insn-config.h #include insn-attr.h /* Include expr.h after insn-config.h so we get HAVE_conditional_move. */ +#include hashtab.h +#include emit-rtl.h +#include expmed.h +#include stmt.h +#include statistics.h +#include real.h +#include fixed-value.h #include expr.h Please move the comment to the proper place ah, my flattening tool doesn't look at comments. I will move the comment before expr.h include, thanks. diff --git a/gcc/expr.h b/gcc/expr.h index a7638b8..f1be8dc 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -20,7 +20,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_EXPR_H #define GCC_EXPR_H -/* For inhibit_defer_pop */ +/* expr.h required includes */ +#if 0 #include hashtab.h #include hash-set.h #include vec.h @@ -29,15 +30,17 @@ along with GCC; see the file COPYING3. If not see #include hard-reg-set.h #include input.h #include function.h -/* For XEXP, GEN_INT, rtx_code */ #include rtl.h -/* For optimize_size */ #include flags.h -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ #include tree-core.h -/* For GET_MODE_BITSIZE, word_mode */ #include insn-config.h +#include alias.h +#include emit-rtl.h +#include expmed.h +#include stmt.h +#endif Err, please remove the #if 0 section I kept it because if something breaks later (hopefully not!), it will be easier to fix. I will remove it. + +#include tree-core.h Why? The original comment says -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ but all those are declared in tree.h. Which means the files including expr.h must already include tree.h. If that's not the reason we need to include tree-core.h from expr.c please add a comment explaining why. bt-load.c fails to compile because it includes expr.h but does not include tree.h I will place tree.h include in all files that include expr.h and rebuild. -/* Definitions from emit-rtl.c */ -#include emit-rtl.h - /* Return a memory reference like MEMREF, but with its mode widened to MODE and adjusted by OFFSET. */ extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT); err - functions defined in emit-rtl.c should be declared in emit-rtl.h. Please fix that first. expr.h should _only_ contain prototypes for stuff defined in expr.c. oops, missed it :( Andrew did a good job with this, first cleaning up a header moving declarations to proper places and only after that flattening it. The rest of the patch looks good to me but expr.h isn't in a good shape after it. I will work on it and send patch with suggested changes by tomorrow. Thanks, Prathamesh Thanks, Richard.
Re: [Fortran, Patch] PR60334 - Segmentation fault on character pointer assignments
Hi Paul, thanks for the reviewed and the valued comments. Just for completeness I have attached the patch with the changes requested. Bootstraps and regtests ok on x86_64-linux-gnu. Regards, Andre On Mon, 12 Jan 2015 22:07:29 +0100 Paul Richard Thomas paul.richard.tho...@gmail.com wrote: Hi Andre, + if (INDIRECT_REF_P (parmse.string_length)) +/* In chains of functions/procedure calls the string_length already + is a pointer to the variable holding the length. Therefore + remove the deref on call. */ +parmse.string_length = TREE_OPERAND (parmse.string_length, 0); This is OK but I would use instead: + if (POINTER_TYPE_P (parmse.string_length)) +/* In chains of functions/procedure calls the string_length already + is a pointer to the variable holding the length. Therefore + remove the deref on call. */ +parmse.string_length = build_fold_indirect_ref (parmse.string_length); If you look in ~/gcc/fold-const.c:15751, you will see that TREE_OPERAND (parmse.string_length, 0) but that it is preceded by cleaning up of NOOPS and, in any case, its usage will preserve the standard API just in case the internals change :-) of course, using TREE_OPERAND (xxx, 0) in the various fortran class functions makes such an assumption ;-) Apart from that, the patch is fine. I'll have a session of doing some commits later this week and will do this patch at that time. Cheers Paul On 11 January 2015 at 16:21, Andre Vehreschild ve...@gmx.de wrote: Hi Paul, thanks for the review. I do not have commits rights. Unfortunately is the patch not ok. I figured today, that it needs an extension when function calls that return deferred char len arrays are nested. In this special case the string length would have been lost. The attached extended version fixes this issue. Sorry for the duplicate work. Bootstraps and regtests ok on x86_64-linux-gnu. Regards, Andre On Sun, 11 Jan 2015 16:11:10 +0100 Paul Richard Thomas paul.richard.tho...@gmail.com wrote: Dear Andre, This is OK for trunk. I have not been keeping track of whether or not you have commit rights yet. If not, I will get to it sometime this week. Thanks for the patch. Paul On 10 January 2015 at 15:59, Andre Vehreschild ve...@gmx.de wrote: Hi all, attached patch fixes the bug reported in pr 60334. The issue here was that the function's result being (a pointer to) a deferred length char array. The string length for the result value was wrapped in a local variable, whose value was never written back to the string length of the result. This lead the calling routine to take the length of the result to be random leading to a crash. This patch addresses the issue by preventing the instantiation of the local var and instead using a reference to the parameter. This not only saves one value on the stack, but also because for small functions the compiler will hold all parameters in registers for a significant level of optimization, all the overhead of memory access (I hope :-). Bootstraps and regtests ok on x86_64-linux-gnu. - Andre -- Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 241 9291018 * Email: ve...@gmx.de -- Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 241 9291018 * Email: ve...@gmx.de -- Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 241 9291018 * Email: ve...@gmx.de pr60334_3.clog Description: Binary data diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 1e74125..86873f7 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1333,12 +1333,30 @@ gfc_get_symbol_decl (gfc_symbol * sym) (sym-ts.u.cl-passed_length == sym-ts.u.cl-backend_decl)) sym-ts.u.cl-backend_decl = NULL_TREE; - if (sym-ts.deferred fun_or_res - sym-ts.u.cl-passed_length == NULL - sym-ts.u.cl-backend_decl) + if (sym-ts.deferred byref) { - sym-ts.u.cl-passed_length = sym-ts.u.cl-backend_decl; - sym-ts.u.cl-backend_decl = NULL_TREE; + /* The string length of a deferred char array is stored in the + parameter at sym-ts.u.cl-backend_decl as a reference and + marked as a result. Exempt this variable from generating a + temporary for it. */ + if (sym-attr.result) + { + /* We need to insert a indirect ref for param decls. */ + if (sym-ts.u.cl-backend_decl + TREE_CODE (sym-ts.u.cl-backend_decl) == PARM_DECL) + sym-ts.u.cl-backend_decl = + build_fold_indirect_ref (sym-ts.u.cl-backend_decl); + } + /* For all other parameters make sure, that they are copied so + that the value and any modifications are local to the routine + by generating a temporary variable. */ + else if (sym-attr.function + sym-ts.u.cl-passed_length == NULL +
Re: shift/extract SHIFT_COUNT_TRUNCATED combine bug
On Mon, Jan 12, 2015 at 11:12 PM, Jeff Law l...@redhat.com wrote: On 04/08/14 14:07, Mike Stump wrote: Something broke in the compiler to cause combine to incorrectly optimize: (insn 12 11 13 3 (set (reg:SI 604 [ D.6102 ]) (lshiftrt:SI (subreg/s/u:SI (reg/v:DI 601 [ x ]) 0) (reg:SI 602 [ D.6103 ]))) t.c:47 4436 {lshrsi3} (expr_list:REG_DEAD (reg:SI 602 [ D.6103 ]) (nil))) (insn 13 12 14 3 (set (reg:SI 605) (and:SI (reg:SI 604 [ D.6102 ]) (const_int 1 [0x1]))) t.c:47 3658 {andsi3} (expr_list:REG_DEAD (reg:SI 604 [ D.6102 ]) (nil))) (insn 14 13 15 3 (set (reg:DI 599 [ D.6102 ]) (zero_extend:DI (reg:SI 605))) t.c:47 4616 {zero_extendsidi2} (expr_list:REG_DEAD (reg:SI 605) (nil))) into: (insn 11 10 12 3 (set (reg:SI 602 [ D.6103 ]) (not:SI (subreg:SI (reg:DI 595 [ D.6102 ]) 0))) t.c:47 3732 {one_cmplsi2} (expr_list:REG_DEAD (reg:DI 595 [ D.6102 ]) (nil))) (note 12 11 13 3 NOTE_INSN_DELETED) (note 13 12 14 3 NOTE_INSN_DELETED) (insn 14 13 15 3 (set (reg:DI 599 [ D.6102 ]) (zero_extract:DI (reg/v:DI 601 [ x ]) (const_int 1 [0x1]) (reg:SI 602 [ D.6103 ]))) t.c:47 4668 {c2_extzvdi} (expr_list:REG_DEAD (reg:SI 602 [ D.6103 ]) (nil))) This shows up in: FAIL: gcc.c-torture/execute/builtin-bitops-1.c execution, -Og -g for me. diff --git a/gcc/combine.c b/gcc/combine.c index 708691f..c1f50ff 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -7245,6 +7245,18 @@ make_extraction (enum machine_mode mode, rtx inner, HOST_WIDE_INT pos, extraction_mode = insn.field_mode; } + /* On a SHIFT_COUNT_TRUNCATED machine, we can't promote the mode of + the extract to a larger size on a variable extract, as previously + the position might have been optimized to change a bit of the + index of the starting bit that would have been ignored before, + but, with a larger mode, will then not be. If we wanted to do + this, we'd have to mask out those bits or prove that those bits + are 0. */ + if (SHIFT_COUNT_TRUNCATED + pos_rtx + GET_MODE_BITSIZE (extraction_mode) GET_MODE_BITSIZE (mode)) +extraction_mode = mode; + /* Never narrow an object, since that might not be safe. */ if (mode != VOIDmode is sufficient to never widen variable extracts on SHIFT_COUNT_TRUNCATED machines. So, the question is, how did people expect this to work? I didn’t spot what changed recently to cause the bad code-gen. The optimization of sub into not is ok, despite how funny it looks, because is feeds into extract which we know by SHIFT_COUNT_TRUNCATED is safe. Is the patch a reasonable way to fix this? On a SHIFT_COUNT_TRUNCATED target, I don't think it's ever OK to widen a shift, variable or constant. In the case of a variable shift, we could easily have eliminated the masking code before or during combine. For a constant shift amount we could have adjusted the constant (see SHIFT_COUNT_TRUNCATED in cse.c) I think it's just an oversight and it has simply never bit us before. IMHO SHIFT_COUNT_TRUNCATED should be removed and instead backends should provide shift patterns with a (and:QI ...) for the shift amount which simply will omit that operation if suitable. Richard. jeff
Re: [patch, arm] Minor optimization on thumb2 tail call
On 19/11/14 02:43, Joey Ye wrote: Current thumb2 -Os generates suboptimal code for following tail call case: int f4(int b, int a, int c, int d); int g(int a, int b, int c, int d) { return f4(b, a, c, d); } arm-none-eabi-gcc -Os -mthumb -mcpu=cortex-m3 test.c push {r4, lr} mov r4, r1 mov r1, r0 mov r0, r4 pop {r4, lr} b f4 There are two issues: The first one is that saving/restoring lr is not necessary, as there is no return via pop pc. The second one is that even if we managed to avoid lr push/pop, ldmia.w sp!, {r4} is still emitted as there is a missing pattern for pop single and code size is not optimal. This patch fixes these two issues and introduces a shared test case. CSiBE thumb2 -Os shows cross board code size reduction, except for one case with 4 bytes regression. The case is like: void f () { if () ... else if () ... else g(); } There are N=2 non-sibcall returns and S=1 sibcall return. Originally the non-sibcall returns are just pop {r4, r5, pc}, now they become b.n .Lreturn .Lreturn: pop {r4, r5} bx lr The one byte save from sibcall return does not win the non-sibcall return regressions back. In general scenario, number of N non-sibcall returns use b.n branching to merged tail, number of S sibcalls save 2 bytes by avoid poping lr. It results in 4-2*S bytes regression. In the worst scenario, each non-sibcall return has to use b.w branching to merged tail, resulting in (N-S)*2 bytes regression. The worst scenario is rare, according to CSiBE. The general regression scenario can only regress 2 bytes at most. So I would not introduce additional complexity to handle the regression case. Make check cortex-m3: pass thumb2 bootstrap (O2/Os): pass * config/arm/arm.c (arm_compute_save_reg_mask): Do not save lr in case of tail call. * config/arm/thumb2.md (*thumb2_pop_single): New pattern. * gcc.target/arm/thumb2-pop-single.c: New test. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 4f04707..20d0b9e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19190,6 +19190,7 @@ arm_compute_save_reg_mask (void) || (save_reg_mask optimize_size ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + !crtl-tail_call_emit !crtl-calls_eh_return)) save_reg_mask |= 1 LR_REGNUM; diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 64acfea..29cfb17 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -267,6 +267,17 @@ (set_attr type multiple)] ) +;; Pop a single register as its size is preferred over a post-incremental load +(define_insn *thumb2_pop_single + [(set (match_operand:SI 0 low_register_operand =r) +(mem:SI (post_inc:SI (reg:SI SP_REGNUM] + TARGET_THUMB2 (reload_in_progress || reload_completed) + pop\t{%0} + [(set_attr type load1) + (set_attr length 2) + (set_attr predicable yes)] +) + ;; We have two alternatives here for memory loads (and similarly for stores) ;; to reflect the fact that the permissible constant pool ranges differ ;; between ldr instructions taking low regs and ldr instructions taking high This is OK thanks. Please CC me on ARM specific patches, this one somehow seems to have missed my filters. Ramana
Re: flatten expr.h (version 2)
On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote: Hi, This is a revamped expr.h flattening flattening patch rebased on tree.h and tree-core.h flattening patch (r219402). It depends upon the following patch to get committed. https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html Changes: * Removed all includes except tree-core.h. Put includes required by expr.h in a comment. * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively. * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, genoutput.c. * Did not put includes in gcc-plugin.h since expr.h cannot be included by plugins (putting them broke building a file in c-family/ since expr.h is not allowed in front-ends) * Affects java front-end (expr.h is allowed in java front-end). Bootstrapped and tested on x86_64-unknown-linux-gnu with languages: all,go,ada,jit Built on all targets in config-list.mk with languages: all, go. OK to commit ? diff --git a/gcc/expr.c b/gcc/expr.c index fc22862..824541e 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -41,11 +41,17 @@ along with GCC; see the file COPYING3. If not see #include regs.h #include hard-reg-set.h #include except.h -#include input.h #include function.h #include insn-config.h #include insn-attr.h /* Include expr.h after insn-config.h so we get HAVE_conditional_move. */ +#include hashtab.h +#include emit-rtl.h +#include expmed.h +#include stmt.h +#include statistics.h +#include real.h +#include fixed-value.h #include expr.h Please move the comment to the proper place diff --git a/gcc/expr.h b/gcc/expr.h index a7638b8..f1be8dc 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -20,7 +20,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_EXPR_H #define GCC_EXPR_H -/* For inhibit_defer_pop */ +/* expr.h required includes */ +#if 0 #include hashtab.h #include hash-set.h #include vec.h @@ -29,15 +30,17 @@ along with GCC; see the file COPYING3. If not see #include hard-reg-set.h #include input.h #include function.h -/* For XEXP, GEN_INT, rtx_code */ #include rtl.h -/* For optimize_size */ #include flags.h -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ #include tree-core.h -/* For GET_MODE_BITSIZE, word_mode */ #include insn-config.h +#include alias.h +#include emit-rtl.h +#include expmed.h +#include stmt.h +#endif Err, please remove the #if 0 section + +#include tree-core.h Why? The original comment says -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop, - ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/ but all those are declared in tree.h. Which means the files including expr.h must already include tree.h. If that's not the reason we need to include tree-core.h from expr.c please add a comment explaining why. -/* Definitions from emit-rtl.c */ -#include emit-rtl.h - /* Return a memory reference like MEMREF, but with its mode widened to MODE and adjusted by OFFSET. */ extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT); err - functions defined in emit-rtl.c should be declared in emit-rtl.h. Please fix that first. expr.h should _only_ contain prototypes for stuff defined in expr.c. Andrew did a good job with this, first cleaning up a header moving declarations to proper places and only after that flattening it. The rest of the patch looks good to me but expr.h isn't in a good shape after it. Thanks, Richard.
Re: [Patch, AArch64, Testsuite] Check for expected MOVI vectorization.
On 9 January 2015 at 16:31, Tejas Belagod tejas.bela...@arm.com wrote: gcc/testsuite: * gcc.target/aarch64/vect-movi.c: Check for vectorization for 64-bit and 128-bit. OK /Marcus
Re: [gomp4] Replace enum omp_clause_map_kind with enum gomp_map_kind (was: Including a file from include/ in gcc/*.h)
Hi! On Mon, 12 Jan 2015 17:39:16 +0100, Jakub Jelinek ja...@redhat.com wrote: On Mon, Jan 12, 2015 at 05:32:14PM +0100, Thomas Schwinge wrote: I have now committed the patch to gomp-4_0-branch in the following form. The issues raised above remain to be resolved. (I'll try to address those later on.) In spirit against the tree.h header flattening, I had to keep the #include include/gomp-constants.h in gcc/tree-core.h, because otherwise I'd have to add it to a ton of *.c files, just for the enum gomp_map_kind definition. I found that in the C++ dialect used by GCC, it is not possible to declare an enum without giving the list of enumerators. N2764 (from 2008) resolved this by adding appropriate syntax for declaring enums, however: warning: scoped enums only available with -std=c++11 or -std=gnu++11. If it were possible to use this, we could add to gcc/tree-core.h: enum gomp_map_kind : char; ... (or similar), and this way decouple the declaration (gcc/tree-core.h) From the actual population of it (include/gomp-constants.h). Alternatively, in gcc/tree-core.h:struct tree_omp_clause, we could switch the map_kind member from enum gomp_map_kind to a char -- but that would defeat the usage of an enum (easy pretty-printing of its enumerators in GDB, and so on.). Or just don't do this and duplicate the constants and just assert somewhere (in omp-low.c) at compile time that all the values match. Either using char and casting the value only in the OMP_* macros or duplicating the values sound preferrable over including include/gomp-constants.h from tree-core.h. Indeed I've found precedent in gcc/tree.h: there already are a few *_SET_* functions, also used for casting to/from enum types. Committed to gomp-4_0-branch in r219524: commit 7dbb7ec6c08d604926fca30e105d2b6411cf73cb Author: tschwinge tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4 Date: Tue Jan 13 10:50:01 2015 + Avoid inclusion of gomp-constants.h in gcc/tree-core.h. N2764 (from 2008) added syntax for declaring enums, however: warning: scoped enums only available with -std=c++11 or -std=gnu++11: in the C++ dialect currently used by GCC, it is not possible to declare an enum without giving the full list of enumerators. If it were possible to use this, we could add to gcc/tree-core.h: enum gomp_map_kind : unsigned char; ..., and keep using enum gomp_map_kind for gcc/tree-core.h's struct tree_omp_clause's map_kind member, and this way decouple the declaration (gcc/tree-core.h) from the actual population of it (include/gomp-constants.h). Until switching GCC to C++11, we'll have to do as follows: gcc/ * tree-core.h: Don't include gomp-constants.h. (struct tree_omp_clause): Change type of map_kind member from enum gomp_map_kind to unsigned char. * tree.h (OMP_CLAUSE_MAP_KIND): Cast it to enum gomp_map_kind. (OMP_CLAUSE_SET_MAP_KIND): New macro. * gimplify.c (gimplify_adjust_omp_clauses_1) (gimplify_adjust_omp_clauses): Use OMP_CLAUSE_SET_MAP_KIND. * omp-low.c (oacc_initialize_reduction_data): Likewise. * tree-nested.c (convert_nonlocal_reference_stmt) (convert_local_reference_stmt, convert_gimple_call): Likewise. * tree-streamer-in.c (unpack_ts_omp_clause_value_fields): Likewise. gcc/c/ * c-parser.c (c_parser_oacc_data_clause) (c_parser_oacc_data_clause_deviceptr, c_parser_omp_clause_map): Use OMP_CLAUSE_SET_MAP_KIND. * c-typeck.c (handle_omp_array_sections): Likewise. gcc/cp/ * parser.c (cp_parser_oacc_data_clause) (cp_parser_oacc_data_clause_deviceptr, cp_parser_omp_clause_map): Use OMP_CLAUSE_SET_MAP_KIND. * semantics.c (handle_omp_array_sections): Likewise. gcc/fortran/ * trans-openmp.c (gfc_omp_finish_clause, gfc_trans_omp_clauses): Use OMP_CLAUSE_SET_MAP_KIND. gcc/ * lto-streamer-out.c: Include gomp-constants.h. * tree-streamer-in.c: Likewise. * tree-streamer-out.c: Likewise. gcc/lto/ * lto.c: Include gomp-constants.h. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@219524 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 19 +++ gcc/c/ChangeLog.gomp | 7 +++ gcc/c/c-parser.c | 6 +++--- gcc/c/c-typeck.c | 2 +- gcc/cp/ChangeLog.gomp | 7 +++ gcc/cp/parser.c| 6 +++--- gcc/cp/semantics.c | 4 ++-- gcc/fortran/trans-openmp.c | 46 +++--- gcc/gimplify.c | 11 ++- gcc/lto-streamer-out.c | 1 + gcc/lto/ChangeLog.gomp | 4 gcc/lto/lto.c | 1 + gcc/omp-low.c | 2 +- gcc/tree-core.h| 5 ++--- gcc/tree-nested.c |
Re: [PATCH] Fix PR64436: broken logic to process bitwise ORs in bswap pass
On Mon, 12 Jan 2015, Thomas Preud'homme wrote: Hi all, To identify if a set of loads, shift, cast, mask (bitwise and) and bitwise OR is equivalent to a load or byteswap, the bswap pass assign a number to each byte loaded according to its significance (1 for lsb, 2 for next least significant byte, etc.) and form a symbolic number such as 0x04030201 for a 32bit load. When processing a bitwise OR of two such symbolic numbers, it is necessary to consider the lowest and highest addresses where a byte was loaded to renumber each byte accordingly. For instance if the two numbers are 0x04030201 and they were loaded from consecutive word in memory the result would be 0x0807060504030201 but if they overlap fully the result would be 0x04030201. Currently the computation of the byte with highest address is broken: it takes the byte with highest address of the symbolic number that starts last. That is, if one number represents a 8bit load at address 0x14 and another number represent a 32bit load at address 0x12 it will compute the end as 0x14 instead of 0x15. This error affects the computation of the size of the load for all targets and the computation of the symbolic number that result from the bitwise OR for big endian targets. This is what causes PR64436 due to a change in the gimple generated for that testcase. ChangeLog entry is as follows: Ok. Thanks, Richard. gcc/ChangeLog 2014-12-30 Thomas Preud'homme thomas.preudho...@arm.com PR tree-optimization/64436 * tree-ssa-math-opts.c (find_bswap_or_nop_1): Move code performing the merge of two symbolic numbers for a bitwise OR to ... (perform_symbolic_merge): This. Also fix computation of the range and end of the symbolic number corresponding to the result of a bitwise OR. diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 1ed2838..286183a 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -1816,6 +1816,123 @@ find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n) return true; } +/* Compute the symbolic number N representing the result of a bitwise OR on 2 + symbolic number N1 and N2 whose source statements are respectively + SOURCE_STMT1 and SOURCE_STMT2. */ + +static gimple +perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1, + gimple source_stmt2, struct symbolic_number *n2, + struct symbolic_number *n) +{ + int i, size; + uint64_t mask; + gimple source_stmt; + struct symbolic_number *n_start; + + /* Sources are different, cancel bswap if they are not memory location with + the same base (array, structure, ...). */ + if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2)) +{ + int64_t inc; + HOST_WIDE_INT start_sub, end_sub, end1, end2, end; + struct symbolic_number *toinc_n_ptr, *n_end; + + if (!n1-base_addr || !n2-base_addr + || !operand_equal_p (n1-base_addr, n2-base_addr, 0)) + return NULL; + + if (!n1-offset != !n2-offset || + (n1-offset !operand_equal_p (n1-offset, n2-offset, 0))) + return NULL; + + if (n1-bytepos n2-bytepos) + { + n_start = n1; + start_sub = n2-bytepos - n1-bytepos; + source_stmt = source_stmt1; + } + else + { + n_start = n2; + start_sub = n1-bytepos - n2-bytepos; + source_stmt = source_stmt2; + } + + /* Find the highest address at which a load is performed and + compute related info. */ + end1 = n1-bytepos + (n1-range - 1); + end2 = n2-bytepos + (n2-range - 1); + if (end1 end2) + { + end = end2; + end_sub = end2 - end1; + } + else + { + end = end1; + end_sub = end1 - end2; + } + n_end = (end2 end1) ? n2 : n1; + + /* Find symbolic number whose lsb is the most significant. */ + if (BYTES_BIG_ENDIAN) + toinc_n_ptr = (n_end == n1) ? n2 : n1; + else + toinc_n_ptr = (n_start == n1) ? n2 : n1; + + n-range = end - n_start-bytepos + 1; + + /* Check that the range of memory covered can be represented by + a symbolic number. */ + if (n-range 64 / BITS_PER_MARKER) + return NULL; + + /* Reinterpret byte marks in symbolic number holding the value of + bigger weight according to target endianness. */ + inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; + size = TYPE_PRECISION (n1-type) / BITS_PER_UNIT; + for (i = 0; i size; i++, inc = BITS_PER_MARKER) + { + unsigned marker = + (toinc_n_ptr-n (i * BITS_PER_MARKER)) MARKER_MASK; + if (marker marker != MARKER_BYTE_UNKNOWN) + toinc_n_ptr-n += inc; + } +} + else +{ + n-range = n1-range; + n_start = n1; + source_stmt = source_stmt1; +} + + if
[AArch64] Allow stack pointer as first input to a subtraction
Several sub-based patterns allowed the stack pointer to be the destination but not the first source. This looked like an oversight; in all the patterns changed here (but not for example in *sub_mul_imm_mode), the instruction allows the stack pointer to appear in both positions. Tested on aarch64-linux-gnu. OK to install? Thanks, Richard gcc/ * config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3) (*sub_optabALLX:mode_GPI:mode, *sub_optabSHORT:mode_si_uxtw) (*sub_optabALLX:mode_shft_GPI:mode) (*sub_optabSHORT:mode_shft_si_uxtw, *sub_optabmode_multp2) (*sub_optabsi_multp2_uxtw, *sub_uxtmode_multp2) (*sub_uxtsi_multp2_uxtw): Add stack pointer sources. gcc/testsuite/ * gcc.target/aarch64/subsp.c: New test. Index: gcc/config/aarch64/aarch64.md === --- gcc/config/aarch64/aarch64.md 2015-01-13 09:48:26.901649982 + +++ gcc/config/aarch64/aarch64.md 2015-01-13 09:48:26.897650031 + @@ -1889,8 +1889,8 @@ (define_insn *add_uxtsi_multp2_uxtw (define_insn subsi3 [(set (match_operand:SI 0 register_operand =rk) - (minus:SI (match_operand:SI 1 register_operand r) - (match_operand:SI 2 register_operand r)))] + (minus:SI (match_operand:SI 1 register_operand rk) + (match_operand:SI 2 register_operand r)))] sub\\t%w0, %w1, %w2 [(set_attr type alu_sreg)] @@ -1900,7 +1900,7 @@ (define_insn subsi3 (define_insn *subsi3_uxtw [(set (match_operand:DI 0 register_operand =rk) (zero_extend:DI - (minus:SI (match_operand:SI 1 register_operand r) + (minus:SI (match_operand:SI 1 register_operand rk) (match_operand:SI 2 register_operand r] sub\\t%w0, %w1, %w2 @@ -1909,8 +1909,8 @@ (define_insn *subsi3_uxtw (define_insn subdi3 [(set (match_operand:DI 0 register_operand =rk,w) - (minus:DI (match_operand:DI 1 register_operand r,w) - (match_operand:DI 2 register_operand r,w)))] + (minus:DI (match_operand:DI 1 register_operand rk,w) + (match_operand:DI 2 register_operand r,w)))] @ sub\\t%x0, %x1, %x2 @@ -2013,7 +2013,7 @@ (define_insn *sub_mul_imm_si_uxtw (define_insn *sub_optabALLX:mode_GPI:mode [(set (match_operand:GPI 0 register_operand =rk) - (minus:GPI (match_operand:GPI 1 register_operand r) + (minus:GPI (match_operand:GPI 1 register_operand rk) (ANY_EXTEND:GPI (match_operand:ALLX 2 register_operand r] @@ -2025,7 +2025,7 @@ (define_insn *sub_optabALLX:mode_G (define_insn *sub_optabSHORT:mode_si_uxtw [(set (match_operand:DI 0 register_operand =rk) (zero_extend:DI - (minus:SI (match_operand:SI 1 register_operand r) + (minus:SI (match_operand:SI 1 register_operand rk) (ANY_EXTEND:SI (match_operand:SHORT 2 register_operand r)] @@ -2035,7 +2035,7 @@ (define_insn *sub_optabSHORT:mode_s (define_insn *sub_optabALLX:mode_shft_GPI:mode [(set (match_operand:GPI 0 register_operand =rk) - (minus:GPI (match_operand:GPI 1 register_operand r) + (minus:GPI (match_operand:GPI 1 register_operand rk) (ashift:GPI (ANY_EXTEND:GPI (match_operand:ALLX 2 register_operand r)) (match_operand 3 aarch64_imm3 Ui3] @@ -2048,7 +2048,7 @@ (define_insn *sub_optabALLX:mode_sh (define_insn *sub_optabSHORT:mode_shft_si_uxtw [(set (match_operand:DI 0 register_operand =rk) (zero_extend:DI - (minus:SI (match_operand:SI 1 register_operand r) + (minus:SI (match_operand:SI 1 register_operand rk) (ashift:SI (ANY_EXTEND:SI (match_operand:SHORT 2 register_operand r)) (match_operand 3 aarch64_imm3 Ui3)] @@ -2059,7 +2059,7 @@ (define_insn *sub_optabSHORT:mode_s (define_insn *sub_optabmode_multp2 [(set (match_operand:GPI 0 register_operand =rk) - (minus:GPI (match_operand:GPI 4 register_operand r) + (minus:GPI (match_operand:GPI 4 register_operand rk) (ANY_EXTRACT:GPI (mult:GPI (match_operand:GPI 1 register_operand r) (match_operand 2 aarch64_pwr_imm3 Up3)) @@ -2074,7 +2074,7 @@ (define_insn *sub_optabmode_multp2 (define_insn *sub_optabsi_multp2_uxtw [(set (match_operand:DI 0 register_operand =rk) (zero_extend:DI - (minus:SI (match_operand:SI 4 register_operand r) + (minus:SI (match_operand:SI 4 register_operand rk) (ANY_EXTRACT:SI (mult:SI (match_operand:SI 1 register_operand r) (match_operand 2 aarch64_pwr_imm3 Up3)) @@ -2113,7 +2113,7 @@ (define_insn *subsi3_carryin_uxtw (define_insn *sub_uxtmode_multp2 [(set
Re: [AARCH64][PR63424][4.9]Backport Fix PR63424 by adding sumaxminv2di3 pattern
On 7 January 2015 at 14:01, Renlin Li renlin...@arm.com wrote: Is it Okay for branch 4.9? gcc/ChangeLog: 2014-11-19 Renlin Li renlin...@arm.com PR target/63424 * config/aarch64/aarch64-simd.md (sumaxminv2di3): New. gcc/testsuite/ChangeLog: 2014-11-19 Renlin Li renlin...@arm.com PR target/63424 * gcc.target/aarch64/pr63424.c: New Test. OK /Marcus
[testsuite] PATCH: Check if -pg available
On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: On 01/12/15 14:51, Magnus Granberg wrote: måndag 12 januari 2015 12.11.17 skrev H.J. Lu: On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote: On 01/12/15 12:59, H.J. Lu wrote: I don't know if -pg will work PIE on any targets. For Linux/x86 the choices of crt1.o are %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} -shared, -pg and -pie are mutually exclusive. Those crt1 files are only crt1 files provided by glibc. You can't even try -pg -pie on Linux without changing glibc. You're totally missing the point. What I care about is *why*. With -pg it use gcrt1.o object file and that file is not compile with -fPIC. When you build a shared lib on x86_64 all the objects files need to be buiit with -fPIC else you get a error like that one abow and it is the same problems when you build bin with -fPIE and linke with -pie. Glibc do not provide one that is compile with -fPIC Is there some reason why glibc could not provide gcrt1.o compiled with -fPIC? Here is a patch to check if -pg is available. If -pg doesn't link, profiling isn't available. OK for trunk? Thanks. H.J. --- gcc/testsuite/lib/target-supports.exp | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0ac9646..7c09399 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -546,6 +546,12 @@ proc check_profiling_available { test_what } { set profiling_available_saved 0 } else { set profiling_available_saved 1 + if { [check_no_compiler_messages_nocache profiling executable { + int main() { return 0; } } -pg] } { + set profiling_available_saved 1 +} else { + set profiling_available_saved 0 + } } } -- 1.9.3
Re: [Fortran, Patch] Cosmetics
Hi, is this patch commited now? I don't have the rights to do so myself. - Andre On Sun, 28 Dec 2014 17:17:50 +0100 FX fxcoud...@gmail.com wrote: 2014-12-28 Andre Vehreschild ve...@gmx.de * trans-decl.c (gfc_finish_var_decl): Fixed displaced comment. * trans-stmt.c (gfc_trans_allocate): Fixed indentation. OK to commit. Thanks! FX -- Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen Tel.: +49 241 9291018 * Email: ve...@gmx.de
Re: [PATCH] Fix PR64415
On Tue, 13 Jan 2015, Jakub Jelinek wrote: On Tue, Jan 13, 2015 at 02:04:26PM +0100, Richard Biener wrote: The following removes -fvar-tracking-assignments from being eligible to the optimization attribute/pragma which fixes LTO operation for mixed inputs (LTO just drops debug stmts if the flag is false). In theory we could also fix inlining to do that when inlining debug stmts into a non-VTA function but I think allowing this kind of per-function IL flags is just silly. I actually think it makes sense to disable -fvar-tracking-assignments just for specific function, e.g. when it is known to be too expensive on some large function you don't care about debug info quality too much, while you still don't want to disable it on the whole TU level, because you have other functions (e.g. small ones) you still want to be able to debug often with good coverage. So if this is fixable in the inliner and/or LTO in-streamer that would be my preference. The following seems to work (for the testcase). Testing coverage of this mode will of course be bad. Richard. 2015-01-13 Richard Biener rguent...@suse.de PR lto/64415 * tree-inline.c (insert_debug_decl_map): Check destination function MAY_HAVE_DEBUG_STMTS. (insert_init_debug_bind): Likewise. (insert_init_stmt): Remove redundant check. (remap_gimple_stmt): Drop debug stmts if the destination function has var-tracking assignments disabled. * gcc.dg/lto/pr64415_0.c: New testcase. * gcc.dg/lto/pr64415_1.c: Likewise. Index: gcc/testsuite/gcc.dg/lto/pr64415_0.c === --- gcc/testsuite/gcc.dg/lto/pr64415_0.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_0.c(working copy) @@ -0,0 +1,13 @@ +/* { dg-lto-do link } */ +/* { dg-require-effective-target fpic } */ +/* { dg-lto-options { { -O -flto -fpic } } } */ +/* { dg-extra-ld-options { -shared } } */ + +extern void bar(char *, int); + +extern char *baz; + +void foo() +{ + bar(baz, 0); +} Index: gcc/testsuite/gcc.dg/lto/pr64415_1.c === --- gcc/testsuite/gcc.dg/lto/pr64415_1.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_1.c(working copy) @@ -0,0 +1,17 @@ +/* { dg-options -g } */ + +extern int n; + +void bar(char *, int); + +inline void bar(char *s, int i) +{ + char *p = s; + +#ifdef V1 + if (i) +#else + if (n) +#endif +*s = 0; +} Index: gcc/tree-inline.c === --- gcc/tree-inline.c (revision 219520) +++ gcc/tree-inline.c (working copy) @@ -192,7 +192,7 @@ insert_debug_decl_map (copy_body_data *i if (!gimple_in_ssa_p (id-src_cfun)) return; - if (!MAY_HAVE_DEBUG_STMTS) + if (!opt_for_fn (id-dst_fn, flag_var_tracking_assignments)) return; if (!target_for_debug_bind (key)) @@ -1348,6 +1348,10 @@ remap_gimple_stmt (gimple stmt, copy_bod bool skip_first = false; gimple_seq stmts = NULL; + if (is_gimple_debug (stmt) + !opt_for_fn (id-dst_fn, flag_var_tracking_assignments)) +return stmts; + /* Begin by recognizing trees that we'll completely rewrite for the inlining context. Our output for these trees is completely different from out input (e.g. RETURN_EXPR is deleted, and morphs @@ -3007,7 +3011,7 @@ insert_init_debug_bind (copy_body_data * if (!gimple_in_ssa_p (id-src_cfun)) return NULL; - if (!MAY_HAVE_DEBUG_STMTS) + if (!opt_for_fn (id-dst_fn, flag_var_tracking_assignments)) return NULL; tracked_var = target_for_debug_bind (var); @@ -3063,7 +3067,7 @@ insert_init_stmt (copy_body_data *id, ba gsi_insert_after (si, init_stmt, GSI_NEW_STMT); gimple_regimplify_operands (init_stmt, si); - if (!is_gimple_debug (init_stmt) MAY_HAVE_DEBUG_STMTS) + if (!is_gimple_debug (init_stmt)) { tree def = gimple_assign_lhs (init_stmt); insert_init_debug_bind (id, bb, def, def, init_stmt);
Re: [AArch64] Allow stack pointer as first input to a subtraction
On 13 January 2015 at 10:47, Richard Sandiford richard.sandif...@arm.com wrote: Several sub-based patterns allowed the stack pointer to be the destination but not the first source. This looked like an oversight; in all the patterns changed here (but not for example in *sub_mul_imm_mode), the instruction allows the stack pointer to appear in both positions. Tested on aarch64-linux-gnu. OK to install? Thanks, Richard gcc/ * config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3) (*sub_optabALLX:mode_GPI:mode, *sub_optabSHORT:mode_si_uxtw) (*sub_optabALLX:mode_shft_GPI:mode) (*sub_optabSHORT:mode_shft_si_uxtw, *sub_optabmode_multp2) (*sub_optabsi_multp2_uxtw, *sub_uxtmode_multp2) (*sub_uxtsi_multp2_uxtw): Add stack pointer sources. gcc/testsuite/ * gcc.target/aarch64/subsp.c: New test. OK /Marcus
Re: [PATCH/AARCH64] Correctly handle stores of zero in fusion_load_store
On 13 January 2015 at 04:48, Andrew Pinski pins...@gmail.com wrote: ChangeLog: * config/aarch64/aarch64.c (fusion_load_store): Check dest mode instead of src mode. * gcc.target/aarch64/store-pair-1.c: New testcase. OK, thanks /Marcus
Re: [PATCH]: New configure options that make the compiler use -fPIE and -pie as default option
On Mon, Jan 12, 2015 at 11:50:41PM +, Joseph Myers wrote: On Mon, 12 Jan 2015, H.J. Lu wrote: +if test x$enable_default_pie = xyes; then + AC_MSG_CHECKING(if $target supports default PIE) + enable_default_pie=no + case $target in +i?86*-*-linux* | x86_64*-*-linux*) + saved_LDFLAGS=$LDFLAGS + saved_CFLAGS=$CFLAGS + CFLAGS=$CFLAGS -fPIE + LDFLAGS=$LDFLAGS -fPIE -pie + AC_TRY_LINK(,,[enable_default_pie=yes],) + LDFLAGS=$saved_LDFLAGS + CFLAGS=$saved_CFLAGS + ;; +*) + ;; +esac There should not be any such hardcoding of targets here without concrete evidence that the targets for which this sets enable_default_pie=no really cannot support PIE. In particular, there is no reason at all for this to be architecture-specific; all GNU/Linux architectures should support PIE. I believe AC_TRY_LINK here will test for the host, whereas what you want to know is what's supported for the target (but it's not possible to run link tests for the target at this point; the compiler for the target hasn't even been built). So: just presume that if the user passes --enable-default-pie then they know what they are doing, and don't try to override their choice. diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index c9e3bf1..89fc305 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -1583,6 +1583,10 @@ not be built. Specify that the run-time libraries for stack smashing protection should not be built. +@item --enable-default-pie +Turn on @option{-fPIE} and @option{-pie} by default if supported. +Currently supported targets are i?86-*-linux* and x86-64-*-linux*. The if supported and target list can then be removed here. Here is the updated patch. To support --enable-default-pie, each target must update STARTFILE_SPEC to support PIE_SPEC and NO_PIE_SPEC. I can provide STARTFILE_SPEC patch if needed. Thanks. H.J. --- gcc/ 2015-01-12 Magnus Granberg zo...@gentoo.org H.J. Lu hongjiu...@intel.com * Makefile.in (COMPILER): Add @NO_PIE_CFLAGS@. (LINKER): Add @NO_PIE_FLAG@. (libgcc.mvars): Set NO_PIE_CFLAGS to -fno-PIE for --enable-default-pie. * common.opt (fPIE): Initialize to -1. (fpie): Likewise. (static): Add RejectNegative Negative(shared). (no-pie): New option. (pie): Replace Negative(shared) with Negative(no-pie). * configure.ac: Add --enable-default-pie. (NO_PIE_CFLAGS): New. Check if -fno-PIE works. AC_SUBST. (NO_PIE_FLAG): New. Check if -no-pie works. AC_SUBST. * defaults.h (DEFAULT_FLAG_PIE): New. Default PIE to -fPIE. * gcc.c (NO_PIE_SPEC): New. (PIE_SPEC): Likewise. (LD_PIE_SPEC): Likewise. (LINK_PIE_SPEC): Handle -no-pie. Use PIE_SPEC and LD_PIE_SPEC. * opts.c (DEFAULT_FLAG_PIE): New. Set to 0 if ENABLE_DEFAULT_PIE is undefined. (finish_options): Update opts-x_flag_pie if it is -1. * config/gnu-user.h (FVTABLE_VERIFY_SPEC): New. (GNU_USER_TARGET_STARTFILE_SPEC): Use FVTABLE_VERIFY_SPEC. Use NO_PIE_SPEC and NO_PIE_SPEC if ENABLE_DEFAULT_PIE is defined. (GNU_USER_TARGET_STARTFILE_SPEC): Use FVTABLE_VERIFY_SPEC. * doc/install.texi: Document --enable-default-pie. * doc/invoke.texi: Document -no-pie. * config.in: Regenerated. * configure: Likewise. gcc/ada/ 2015-01-12 H.J. Lu hongjiu...@intel.com * gcc-interface/Makefile.in (TOOLS_LIBS): Add @NO_PIE_FLAG@. libgcc/ 2015-01-12 H.J. Lu hongjiu...@intel.com * Makefile.in (CRTSTUFF_CFLAGS): Add $(NO_PIE_CFLAGS). diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 5f9261f..180751f 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -252,6 +252,12 @@ LINKER = $(CC) LINKER_FLAGS = $(CFLAGS) endif +# We don't want to compile the compiler with -fPIE, it make PCH fail. +COMPILER += @NO_PIE_CFLAGS@ + +# Link with -no-pie since we compile the compiler with -fno-PIE. +LINKER += @NO_PIE_FLAG@ + # Like LINKER, but use a mutex for serializing front end links. ifeq (@DO_LINK_MUTEX@,true) LLINKER = $(SHELL) $(srcdir)/lock-and-run.sh linkfe.lck $(LINKER) @@ -1854,6 +1860,12 @@ libgcc.mvars: config.status Makefile specs xgcc$(exeext) echo GCC_CFLAGS = '$(GCC_CFLAGS)' tmp-libgcc.mvars echo INHIBIT_LIBC_CFLAGS = '$(INHIBIT_LIBC_CFLAGS)' tmp-libgcc.mvars echo TARGET_SYSTEM_ROOT = '$(TARGET_SYSTEM_ROOT)' tmp-libgcc.mvars + if test @enable_default_pie@ = yes; then \ + NO_PIE_CFLAGS=-fno-PIE; \ + else \ + NO_PIE_CFLAGS=; \ + fi; \ + echo NO_PIE_CFLAGS = $$NO_PIE_CFLAGS tmp-libgcc.mvars mv tmp-libgcc.mvars libgcc.mvars diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in index 870cfab..a446d48 100644 --- a/gcc/ada/gcc-interface/Makefile.in
Re: [testsuite] PATCH: Check if -pg available
On Tue, Jan 13, 2015 at 04:54:32AM -0800, H.J. Lu wrote: On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: On 01/12/15 14:51, Magnus Granberg wrote: måndag 12 januari 2015 12.11.17 skrev H.J. Lu: On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote: On 01/12/15 12:59, H.J. Lu wrote: I don't know if -pg will work PIE on any targets. For Linux/x86 the choices of crt1.o are %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} -shared, -pg and -pie are mutually exclusive. Those crt1 files are only crt1 files provided by glibc. You can't even try -pg -pie on Linux without changing glibc. You're totally missing the point. What I care about is *why*. With -pg it use gcrt1.o object file and that file is not compile with -fPIC. When you build a shared lib on x86_64 all the objects files need to be buiit with -fPIC else you get a error like that one abow and it is the same problems when you build bin with -fPIE and linke with -pie. Glibc do not provide one that is compile with -fPIC Is there some reason why glibc could not provide gcrt1.o compiled with -fPIC? Here is a patch to check if -pg is available. If -pg doesn't link, profiling isn't available. OK for trunk? Thanks. H.J. --- gcc/testsuite/lib/target-supports.exp | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0ac9646..7c09399 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -546,6 +546,12 @@ proc check_profiling_available { test_what } { set profiling_available_saved 0 } else { set profiling_available_saved 1 + if { [check_no_compiler_messages_nocache profiling executable { + int main() { return 0; } } -pg] } { + set profiling_available_saved 1 + } else { + set profiling_available_saved 0 + } } } Here is the ChangeLog entry. 2015-01-13 H.J. Lu hongjiu...@intel.com * lib/target-supports.exp (check_profiling_available): Check if -pg links. H.J.
[PATCH] Fix PR64373
The following patch guards LTO against PARM_DECLs without DECL_CONTEXT. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-02-13 Richard Biener rguent...@suse.de PR lto/64373 * lto-streamer-out.c (tree_is_indexable): Guard for NULL DECL_CONTEXT. * gcc.dg/lto/pr64373_0.c: New testcase. Index: gcc/lto-streamer-out.c === --- gcc/lto-streamer-out.c (revision 219520) +++ gcc/lto-streamer-out.c (working copy) @@ -154,7 +154,8 @@ tree_is_indexable (tree t) /* Parameters and return values of functions of variably modified types must go to global stream, because they may be used in the type definition. */ - if (TREE_CODE (t) == PARM_DECL || TREE_CODE (t) == RESULT_DECL) + if ((TREE_CODE (t) == PARM_DECL || TREE_CODE (t) == RESULT_DECL) + DECL_CONTEXT (t)) return variably_modified_type_p (TREE_TYPE (DECL_CONTEXT (t)), NULL_TREE); /* IMPORTED_DECL is put into BLOCK and thus it never can be shared. */ else if (TREE_CODE (t) == IMPORTED_DECL) Index: gcc/testsuite/gcc.dg/lto/pr64373_0.c === --- gcc/testsuite/gcc.dg/lto/pr64373_0.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64373_0.c(working copy) @@ -0,0 +1,10 @@ +/* { dg-lto-do assemble } */ + +extern void b(int L, float (*data)[L]); + +void a(void) +{ + float* p = 0; + int i = 0; + b(10, (float (*)[10])(p + i)); +}
Re: [PATCH] Fix PR64415
On Tue, Jan 13, 2015 at 02:04:26PM +0100, Richard Biener wrote: The following removes -fvar-tracking-assignments from being eligible to the optimization attribute/pragma which fixes LTO operation for mixed inputs (LTO just drops debug stmts if the flag is false). In theory we could also fix inlining to do that when inlining debug stmts into a non-VTA function but I think allowing this kind of per-function IL flags is just silly. I actually think it makes sense to disable -fvar-tracking-assignments just for specific function, e.g. when it is known to be too expensive on some large function you don't care about debug info quality too much, while you still don't want to disable it on the whole TU level, because you have other functions (e.g. small ones) you still want to be able to debug often with good coverage. So if this is fixable in the inliner and/or LTO in-streamer that would be my preference. Jakub
[PATCH] Fix PR64406
When a optimization pass in the loop pipeline moves stmts between loops or removes loops we have to reset the SCEV cache to not have stale CHREC_LOOPs. This patch does it for loop distribution for which I have a testcase. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-01-13 Richard Biener rguent...@suse.de PR tree-optimization/64406 * tree-loop-distibution.c (pass_loop_distribution::execute): Reset the SCEV hashtable if we distributed anything. * gcc.dg/pr64406.c: New testcase. Index: gcc/tree-loop-distribution.c === --- gcc/tree-loop-distribution.c(revision 219520) +++ gcc/tree-loop-distribution.c(working copy) @@ -1838,6 +1851,9 @@ out: if (changed) { + /* Cached scalar evolutions now may refer to wrong or non-existing +loops. */ + scev_reset_htab (); mark_virtual_operands_for_renaming (fun); rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); } Index: gcc/testsuite/gcc.dg/pr64406.c === --- gcc/testsuite/gcc.dg/pr64406.c (revision 0) +++ gcc/testsuite/gcc.dg/pr64406.c (working copy) @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options -O -ftree-loop-distribute-patterns -fno-tree-loop-ivcanon -fno-tree-loop-vectorize -ftree-vectorize } */ + +unsigned in[72]; + +void bar (unsigned out[], unsigned ia[]); + +void +foo () +{ + int i; + unsigned out[72], ia[8]; + for (i = 0; i 8; i++) +{ + out[i * 8] = in[i * 8] + 5; + out[i * 8 + 1] = in[i * 8 + 1] + 6; + out[i * 8 + 2] = in[i * 8 + 2] + 7; + out[i * 8 + 3] = in[i * 8 + 3] + 8; + out[i * 8 + 4] = in[i * 8 + 4] + 9; + out[i * 8 + 5] = in[i * 8 + 5] + 10; + out[i * 8 + 6] = in[i * 8 + 6] + 11; + out[i * 8 + 7] = in[i * 8 + 7] + 12; + ia[i] = in[i]; +} + bar (out, ia); +}
[PATCH][ARM] PR 64149: Remove -mlra/-mno-lra option for ARM.
Hello, The LRA register alloator is enabled by default for the ARM backend and -mno-lra should no longer be used. This patch removes the -mlra/-mno-lra option from the ARM backend. arm-none-linux-gnueabihf passes gcc-check with no new failures. Matthew 2015-01-13 Matthew Wahab matthew.wa...@arm.com PR target/64149 * config/arm/arm.opt: Remove lra option and arm_lra_flag variable. * config/arm/arm.h (MODE_BASE_REG_CLASS): Remove use of arm_lra_flag, replace the conditional with it's true branch. * config/arm/arm.c (TARGET_LRA_P): Set to hook_bool_void_true. (arm_lra_p): Remove. * testsuite/gcc.target/arm/thumb1-far-jump-3.c: Remove.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8ca2dd8..e03e063 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -103,7 +103,6 @@ struct four_ints /* Forward function declarations. */ static bool arm_const_not_ok_for_debug_p (rtx); -static bool arm_lra_p (void); static bool arm_needs_doubleword_align (machine_mode, const_tree); static int arm_compute_static_chain_stack_bytes (void); static arm_stack_offsets *arm_get_frame_offsets (void); @@ -370,7 +369,7 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address #undef TARGET_LRA_P -#define TARGET_LRA_P arm_lra_p +#define TARGET_LRA_P hook_bool_void_true #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE arm_attribute_table @@ -5932,13 +5931,6 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, } } -/* Return true if we use LRA instead of reload pass. */ -static bool -arm_lra_p (void) -{ - return arm_lra_flag; -} - /* Return true if mode/type need doubleword alignment. */ static bool arm_needs_doubleword_align (machine_mode mode, const_tree type) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 556e2da..ff38017 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1287,14 +1287,10 @@ enum reg_class /* For the Thumb the high registers cannot be used as base registers when addressing quantities in QI or HI mode; if we don't know the mode, then we must be conservative. */ -#define MODE_BASE_REG_CLASS(MODE) \ - (arm_lra_flag\ - ? (TARGET_32BIT ? CORE_REGS \ - : GET_MODE_SIZE (MODE) = 4 ? BASE_REGS\ - : LO_REGS) \ - : ((TARGET_ARM || (TARGET_THUMB2 !optimize_size)) ? CORE_REGS \ - : ((MODE) == SImode) ? BASE_REGS \ - : LO_REGS)) +#define MODE_BASE_REG_CLASS(MODE)\ + (TARGET_32BIT ? CORE_REGS \ + : GET_MODE_SIZE (MODE) = 4 ? BASE_REGS \ + : LO_REGS) /* For Thumb we can not support SP+reg addressing, so we return LO_REGS instead of BASE_REGS. */ diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 5385e4a..6da49b8 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -143,10 +143,6 @@ mfpu= Target RejectNegative Joined Enum(arm_fpu) Var(arm_fpu_index) Specify the name of the target floating point hardware/format -mlra -Target Report Var(arm_lra_flag) Init(1) Save -Use LRA instead of reload (transitional) - mhard-float Target RejectNegative Alias(mfloat-abi=, hard) Undocumented diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c deleted file mode 100644 index 90559ba..000 --- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c +++ /dev/null @@ -1,108 +0,0 @@ -/* Catch reload ICE on target thumb1 with far jump optimization. - * It is also a valid case for non-thumb1 target. */ - -/* Add -mno-lra option as it is only reproducable with reload. It will - be removed after reload is completely removed. */ -/* { dg-options -mno-lra -fomit-frame-pointer } */ -/* { dg-do compile } */ - -#define C 2 -#define A 4 -#define RGB (C | A) -#define GRAY (A) - -typedef unsigned long uint_32; -typedef unsigned char byte; -typedef byte* bytep; - -typedef struct ss -{ - uint_32 w; - uint_32 r; - byte c; - byte b; - byte p; -} info; - -typedef info * infop; - -void -foo(infop info, bytep row) -{ - uint_32 iw = info-w; - if (info-c == RGB) - { - if (info-b == 8) - { - bytep sp = row + info-r; - bytep dp = sp; - byte save; - uint_32 i; - - for (i = 0; i iw; i++) - { -save = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = save; - } - } - - else - { - bytep sp = row + info-r; - bytep dp = sp; - byte save[2]; - uint_32 i; - - for (i = 0; i iw; i++) - { -save[0] = *(--sp); -save[1] = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -*(--dp) = *(--sp); -
Re: [PATCH 2/4] Pipeline model for APM XGene-1.
On 12 January 2015 at 20:15, Philipp Tomsich philipp.toms...@theobroma-systems.com wrote: --- gcc/config/aarch64/aarch64.md | 1 + gcc/config/arm/xgene1.md | 531 ++ 2 files changed, 532 insertions(+) create mode 100644 gcc/config/arm/xgene1.md diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 12e1054..1f6b1b6 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -190,6 +190,7 @@ (include ../arm/cortex-a53.md) (include ../arm/cortex-a15.md) (include thunderx.md) +(include ../arm/xgene1.md Can we have a ChangeLog entry please. /Marcus
Re: [PATCH] Fix up computed goto on POINTERS_EXTEND_UNSIGNED targets (PR middle-end/63974)
On Mon, Jan 12, 2015 at 12:19 PM, Jakub Jelinek ja...@redhat.com wrote: Hi! The 991213-3.c testcase ICEs on aarch64-linux with -mabi=ilp32 since wide-int merge. The problem is that x = convert_memory_address (Pmode, x) is used twice on a VOIDmode CONST_INT, which is wrong. For non-VOIDmode rtl the second convert_memory_address is a NOP, but for VOIDmode the second call treats the CONST_INT returned by the first call as if it was again ptr_mode, rather than Pmode. On aarch64-linux in particular, the constant is zero-extended from SImode to DImode in the first call, so it is not valid SImode CONST_INT any longer. emit_indirect_jump always calls convert_memory_address (Pmode, ...) on the operand in optabs.c when handling EXPAND_ADDRESS case in maybe_legitimize_operand, so the first convert_memory_address is both unnecessary and harmful. Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux (which do not define POINTERS_EXTEND_UNSIGNED) and tested on the problematic testcase with aarch64-linux cross. Can anyone with easy access to POINTERS_EXTEND_UNSIGNED targets (aarch64-linux ilp32, x86_64 -mx32, ia64-hpux) please test this? Ok for trunk if it works there? 2015-01-12 Jakub Jelinek ja...@redhat.com PR middle-end/63974 * cfgexpand.c (expand_computed_goto): Don't call convert_memory_address here. --- gcc/cfgexpand.c.jj 2015-01-09 21:59:54.0 +0100 +++ gcc/cfgexpand.c 2015-01-12 14:41:35.210705174 +0100 @@ -3060,8 +3060,6 @@ expand_computed_goto (tree exp) { rtx x = expand_normal (exp); - x = convert_memory_address (Pmode, x); - do_pending_stack_adjust (); emit_indirect_jump (x); } No regressions on x32. -- H.J.
Re: [PATCH/AARCH64] Disable load/store pair peephole for volatile mem
On 10 December 2014 at 02:18, Andrew Pinski pins...@gmail.com wrote: Hi, As mentioned in https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the load/store pair peepholes currently accept volatile mem which can cause wrong code as the architecture does not define which part of the pair happens first. This patch disables the peephole for volatile mem and adds two testcases so that volatile loads are not converted into load pair (I could add the same for store pair if needed). In the second testcase, only f3 does not get converted to load pair, even though the order of the loads are different. OK? Bootstrapped and tested on aarch64-linux-gnu without any regressions. Thanks, Andrew Pinski ChangeLog: * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject volatile mems. (aarch64_operands_adjust_ok_for_ldpstp): Likewise. testsuite/ChangeLog: * gcc.target/aarch64/volatileloadpair-1.c: New testcase. * gcc.target/aarch64/volatileloadpair-2.c: New testcase. OK. Bin, Feel free to follow up with a patch to reorg the MEM_P /Marcus
Re: [testsuite] PATCH: Add check_effective_target_pie
On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote: On 01/12/15 14:51, Magnus Granberg wrote: måndag 12 januari 2015 12.11.17 skrev H.J. Lu: On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote: On 01/12/15 12:59, H.J. Lu wrote: I don't know if -pg will work PIE on any targets. For Linux/x86 the choices of crt1.o are %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} -shared, -pg and -pie are mutually exclusive. Those crt1 files are only crt1 files provided by glibc. You can't even try -pg -pie on Linux without changing glibc. You're totally missing the point. What I care about is *why*. With -pg it use gcrt1.o object file and that file is not compile with -fPIC. When you build a shared lib on x86_64 all the objects files need to be buiit with -fPIC else you get a error like that one abow and it is the same problems when you build bin with -fPIE and linke with -pie. Glibc do not provide one that is compile with -fPIC Is there some reason why glibc could not provide gcrt1.o compiled with -fPIC? That is a good question. We can compile gcrt1.o with -fPIC and it will work with both -pg and -pg -pie. I will open a glibc bug. Here is the updated patch without the check_profiling_available change. OK for trunk? Thanks. H.J. --- Subject: [PATCH 1/5] Add check_effective_target_pie Hi, This patch adds check_effective_target_pie to check if the current multilib generates PIE by default. Thanks. H.J. --- 2015-01-11 H.J. Lu hongjiu...@intel.com * gcc.target/i386/pie.c: New test. * lib/target-supports.exp (check_effective_target_pie): New. --- gcc/testsuite/gcc.target/i386/pie.c | 12 gcc/testsuite/lib/target-supports.exp | 10 ++ 2 files changed, 22 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pie.c diff --git a/gcc/testsuite/gcc.target/i386/pie.c b/gcc/testsuite/gcc.target/i386/pie.c new file mode 100644 index 000..0a9f5ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pie.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target pie } } */ +/* { dg-options -O2 } */ + +int foo (void); + +int +main (void) +{ + return foo (); +} + +/* { dg-final { scan-assembler foo@PLT } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index f5c6db8..0ac9646 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1080,6 +1080,16 @@ proc check_effective_target_nonpic { } { }] } +# Return 1 if the current multilib generates PIE by default. + +proc check_effective_target_pie { } { +return [check_no_compiler_messages pie assembly { + #ifndef __PIE__ + #error unsupported + #endif +}] +} + # Return 1 if the target does not use a status wrapper. proc check_effective_target_unwrapped { } { -- 1.9.3
Re: [PATCH 1/4] Core definition for APM XGene-1 and associated cost-table.
On 12 January 2015 at 20:15, Philipp Tomsich philipp.toms...@theobroma-systems.com wrote: +2014-11-19 Philipp Tomsich philipp.toms...@theobroma-systems.com + + * config/aarch64/aarch64-cores.def (xgene1): Update/add the + xgene1 (APM XGene-1) core definition. + * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1 + * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1 + * doc/invoke.texi: Document -mcpu=xgene1. + Fix the date in the ChangeLog entry... otherwise OK commit it. Thanks /Marcus
[PATCH] Fix PRs 64493 and 64495
The following fixes a bug in outer loop reduction vectorization which happens to use a bogus vectorized stmt for the inner loop exit PHI. Bootstrap and regtest in progress on x86_64-unknown-linux-gnu. Richard. 2015-01-13 Richard Biener rguent...@suse.de PR tree-optimization/64493 PR tree-optimization/64495 * tree-vect-loop.c (vect_finalize_reduction): For double-reductions assign the proper vectorized PHI to the inner loop exit PHIs. * gcc.dg/vect/pr64493.c: New testcase. * gcc.dg/vect/pr64495.c: Likewise. Index: gcc/tree-vect-loop.c === --- gcc/tree-vect-loop.c(revision 219520) +++ gcc/tree-vect-loop.c(working copy) @@ -4580,7 +4580,10 @@ vect_finalize_reduction: !STMT_VINFO_LIVE_P (exit_phi_vinfo)) || double_reduc); - STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt; + if (double_reduc) + STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi; + else + STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt; if (!double_reduc || STMT_VINFO_DEF_TYPE (exit_phi_vinfo) != vect_double_reduction_def) Index: gcc/testsuite/gcc.dg/vect/pr64493.c === --- gcc/testsuite/gcc.dg/vect/pr64493.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr64493.c (working copy) @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include tree-vect.h + +int a, b, c, d, e, f, g, h; + +int +main () +{ + check_vect (); + + for (; a; a--) +for (d = 1; d = 0; d++) + for (; d;) + if (h) + { + if (!g) __builtin_abort (); + if (!0) __builtin_abort (); + } + + for (f = 4; f; f--) +{ + for (b = 0; b 2; b++) + c |= 1; + e |= c; +} + + return 0; +} + +/* { dg-final { cleanup-tree-dump vect } } */ Index: gcc/testsuite/gcc.dg/vect/pr64495.c === --- gcc/testsuite/gcc.dg/vect/pr64495.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr64495.c (working copy) @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include assert.h +#include tree-vect.h + +int a, b, c, d, e, f, g, i, j; +static int *h = e; + +int +main () +{ + check_vect (); + + for (; a;) +for (; g; g++) + for (; f; f++) + if (j) + { + assert(b); + assert(0); + } + for (i = 24; i; i--) +{ + for (c = 0; c 6; c++) + d |= 1; + *h |= d; +} + + if (e != 1) +__builtin_abort (); + + return 0; +} + +/* { dg-final { cleanup-tree-dump vect } } */
Re: [PATCH] Fix PR64415
On Tue, Jan 13, 2015 at 02:26:39PM +0100, Richard Biener wrote: The following seems to work (for the testcase). Testing coverage of this mode will of course be bad. LGTM. 2015-01-13 Richard Biener rguent...@suse.de PR lto/64415 * tree-inline.c (insert_debug_decl_map): Check destination function MAY_HAVE_DEBUG_STMTS. (insert_init_debug_bind): Likewise. (insert_init_stmt): Remove redundant check. (remap_gimple_stmt): Drop debug stmts if the destination function has var-tracking assignments disabled. * gcc.dg/lto/pr64415_0.c: New testcase. * gcc.dg/lto/pr64415_1.c: Likewise. Jakub
Re: [PATCH] Fix PR64404
On Mon, 12 Jan 2015, Richard Biener wrote: I am testing the following patch to fix a latent bug in the vectorizer dealing with redundant DRs. Bootstrap and regtest pending on x86_64-unknown-linux-gnu. Which shows the patch is bogus. Instead we are not prepared to handle this situation. Thus the following patch rejects it, making the testcase a runtime one as well. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-01-13 Richard Biener rguent...@suse.de PR tree-optimization/64404 * tree-vect-stmts.c (vectorizable_load): Reject conflicting SLP types for CSEd loads. * gcc.dg/vect/pr64404.c: New testcase. Index: gcc/tree-vect-stmts.c === --- gcc/tree-vect-stmts.c (revision 219520) +++ gcc/tree-vect-stmts.c (working copy) @@ -5791,6 +5791,20 @@ vectorizable_load (gimple stmt, gimple_s group loads with negative dependence distance\n); return false; } + + /* Similarly when the stmt is a load that is both part of a SLP + instance and a loop vectorized stmt via the same-dr mechanism +we have to give up. */ + if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info) + (STMT_SLP_TYPE (stmt_info) + != STMT_SLP_TYPE (vinfo_for_stmt +(STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +conflicting SLP types for CSEd load\n); + return false; + } } Index: gcc/testsuite/gcc.dg/vect/pr64404.c === --- gcc/testsuite/gcc.dg/vect/pr64404.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr64404.c (working copy) @@ -0,0 +1,59 @@ +/* { dg-do run } */ +/* { dg-additional-options --param=sccvn-max-alias-queries-per-access=1 } */ + +#include tree-vect.h + +extern void abort (void); + +typedef struct +{ + int l, h; +} tFPinterval; + +tFPinterval X[1024]; +tFPinterval Y[1024]; +tFPinterval Z[1024]; + +void __attribute__((noinline)) +Compute (void) +{ + int d; + for (d = 0; d 1024; d++) +{ + Y[d].l = X[d].l + X[d].h; + Y[d].h = Y[d].l; + Z[d].l = X[d].l; + Z[d].h = X[d].h; +} +} + +int +main (void) +{ + int d; + + check_vect (); + + for (d = 0; d 1024; d++) +{ + X[d].l = d; + X[d].h = d + 1; + __asm__ volatile (); +} + + Compute (); + + for (d = 0; d 1024; d++) +{ + if (Y[d].l != X[d].l + X[d].h +|| Y[d].h != Y[d].l +|| Z[d].l != X[d].l +|| Z[d].h != X[d].h) + abort (); + __asm__ volatile (); +} + + return 0; +} + +/* { dg-final { cleanup-tree-dump vect } } */
[PATCH] Fix PR64415
The following removes -fvar-tracking-assignments from being eligible to the optimization attribute/pragma which fixes LTO operation for mixed inputs (LTO just drops debug stmts if the flag is false). In theory we could also fix inlining to do that when inlining debug stmts into a non-VTA function but I think allowing this kind of per-function IL flags is just silly. Thoughts? Thanks, Richard. 2015-01-13 Richard Biener rguent...@suse.de PR lto/64415 * common.opt (fvar-tracking-assignments): Remove 'Optimization' flag. (fvar-tracking-assignments-toggle): Likewise. * gcc.dg/lto/pr64415_0.c: New testcase. * gcc.dg/lto/pr64415_1.c: Likewise. Index: gcc/common.opt === --- gcc/common.opt (revision 219520) +++ gcc/common.opt (working copy) @@ -2397,13 +2397,13 @@ Perform variable tracking ; annotations. When flag_var_tracking_assignments == ; AUTODETECT_VALUE it will be set according to flag_var_tracking. fvar-tracking-assignments -Common Report Var(flag_var_tracking_assignments) Init(2) Optimization +Common Report Var(flag_var_tracking_assignments) Init(2) Perform variable tracking by annotating assignments ; Nonzero if we should toggle flag_var_tracking_assignments after ; processing options and computing its default. */ fvar-tracking-assignments-toggle -Common Report Var(flag_var_tracking_assignments_toggle) Optimization +Common Report Var(flag_var_tracking_assignments_toggle) Toggle -fvar-tracking-assignments ; Positive if we should track uninitialized variables, negative if Index: gcc/testsuite/gcc.dg/lto/pr64415_0.c === --- gcc/testsuite/gcc.dg/lto/pr64415_0.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_0.c(working copy) @@ -0,0 +1,13 @@ +/* { dg-lto-do link } */ +/* { dg-require-effective-target fpic } */ +/* { dg-lto-options { { -O -flto -fpic } } } */ +/* { dg-extra-ld-options { -shared } } */ + +extern void bar(char *, int); + +extern char *baz; + +void foo() +{ + bar(baz, 0); +} Index: gcc/testsuite/gcc.dg/lto/pr64415_1.c === --- gcc/testsuite/gcc.dg/lto/pr64415_1.c(revision 0) +++ gcc/testsuite/gcc.dg/lto/pr64415_1.c(working copy) @@ -0,0 +1,17 @@ +/* { dg-options -g } */ + +extern int n; + +void bar(char *, int); + +inline void bar(char *s, int i) +{ + char *p = s; + +#ifdef V1 + if (i) +#else + if (n) +#endif +*s = 0; +}
Re: [PATCH] rs6000: Make rs6000_split_logical handle inverted 2nd operand (PR64358)
On Mon, Jan 12, 2015 at 6:52 PM, Pat Haugen pthau...@linux.vnet.ibm.com wrote: Following backport tested on 4.8/4.9 with no new regressions. Ok to commit to those branches? -Pat 2015-01-12 Pat Haugen pthau...@us.ibm.com Backport from mainline 2014-12-20 Segher Boessenkool seg...@kernel.crashing.org PR target/64358 * config/rs6000/rs6000.c (rs6000_split_logical_inner): Swap the input operands if only the second is inverted. * config/rs6000/rs6000.md (*boolcmode3_internal1 for BOOL_128): Swap BOOL_REGS_OP1 and BOOL_REGS_OP2. Correct arguments to rs6000_split_logical. (*boolcmode3_internal2 for TI2): Swap operands[1] and operands[2]. Okay. Thanks, David
Re: [PATCH] PR59448 - Promote consume to acquire
On 01/13/2015 09:59 AM, Richard Biener wrote: On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote: Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448 Basically we can generate incorrect code for an atomic consume operation in some circumstances. The general feeling seems to be that we should simply promote all consume operations to an acquire operation until there is a better definition/understanding of the consume model and how GCC can track it. I proposed a simple patch in the PR, and I have not seen or heard of any dissenting opinion. We should get this in before the end of stage 3 I think. The problem with the patch in the PR is the memory model is immediately promoted from consume to acquire. This happens *before* any of the memmodel checks are made. If a consume is illegally specified (such as in a compare_exchange), it gets promoted to acquire and the compiler doesn't report the error because it never sees the consume. This new patch simply makes the adjustment after any errors are checked on the originally specified model. It bootstraps on x86_64-unknown-linux-gnu and passes all regression testing. I also built an aarch64 compiler and it appears to issue the LDAR as specified in the PR, but anyone with a vested interest really ought to check it out with a real build to be sure. OK for trunk? Why not patch get_memmodel? (not sure if that catches all cases) Richard. That was the original patch. The issue is that it promotes consume to acquire before any error checking gets to look at the model, so then we allow illegal specification of consume. (It actually triggers a failure in the testsuite) Andrew
[[ARM/AArch64][testsuite] 09/36] Add vsubhn, vraddhn and vrsubhn tests. Split vaddhn.c into vXXXhn.inc and vaddhn.c to share code with other new tests.
* gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vraddhn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vrsubhn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vsubhn.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: Use code from vXXXhn.inc. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc new file mode 100644 index 000..0dbcc92 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc @@ -0,0 +1,50 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result. */ +#define TEST_VADDHN1(INSN, T1, T2, W, W2, N) \ + VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \ +VECT_VAR(vector2, T1, W, N)); \ + vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) + +#define TEST_VADDHN(INSN, T1, T2, W, W2, N)\ + TEST_VADDHN1(INSN, T1, T2, W, W2, N) + + DECL_VARIABLE_64BITS_VARIANTS(vector64); + DECL_VARIABLE_128BITS_VARIANTS(vector1); + DECL_VARIABLE_128BITS_VARIANTS(vector2); + + clean_results (); + + /* Fill input vector1 and vector2 with arbitrary values */ + VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1)); + VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1)); + VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1)); + VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1)); + VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1)); + VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1)); + + VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX); + VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX); + VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX); + VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX); + VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX); + VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX); + + TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8); + TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4); + TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2); + TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8); + TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4); + TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2); + + CHECK_RESULTS (TEST_MSG, ); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c index 58fd5ea..88c92f3 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c @@ -8,6 +8,9 @@ #include stdint.h #endif +#define INSN_NAME vaddhn +#define TEST_MSG VADDHN + /* Expected results. */ VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32 }; @@ -52,56 +55,4 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x, VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x, 0x, 0x }; -#define INSN_NAME vaddhn -#define TEST_MSG VADDHN - -#define FNNAME1(NAME) exec_ ## NAME -#define FNNAME(NAME) FNNAME1(NAME) - -void FNNAME (INSN_NAME) (void) -{ - /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result. */ -#define TEST_VADDHN1(INSN, T1, T2, W, W2, N) \ - VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \ -VECT_VAR(vector2, T1, W, N)); \ - vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N)) - -#define TEST_VADDHN(INSN, T1, T2, W, W2, N)\ - TEST_VADDHN1(INSN, T1, T2, W, W2, N) - - DECL_VARIABLE_64BITS_VARIANTS(vector64); - DECL_VARIABLE_128BITS_VARIANTS(vector1); - DECL_VARIABLE_128BITS_VARIANTS(vector2); - - clean_results (); - - /* Fill input vector1 and vector2 with arbitrary values */ - VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1)); - VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1)); - VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1)); - VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1)); - VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1)); - VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1)); - - VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX); - VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX); - VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX); - VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX); - VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX); - VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX); - - TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8); - TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4);
[[ARM/AArch64][testsuite] 12/36] Add vmlal_n and vmlsl_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc new file mode 100644 index 000..a968584 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc @@ -0,0 +1,61 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = vmlxl_n(vector, vector2, val), + then store the result. */ +#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) \ + VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W2, N), \ + V); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V)\ + TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V) + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + + /* Choose multiplier arbitrarily. */ + TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11); + TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22); + TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33); + TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c new file mode 100644 index 000..118068c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c @@ -0,0 +1,14 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +#define INSN_NAME vmlal_n +#define TEST_MSG VMLAL_N + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x595, 0x596, 0x597, 0x598 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xb3a, 0xb3b }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x10df, 0x10e0 }; + +#include vmlXl_n.inc diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c new file mode 100644 index 000..a26c69f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c @@ -0,0 +1,18 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +#define INSN_NAME vmlsl_n +#define TEST_MSG VMLSL_N + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfa4b, 0xfa4c, + 0xfa4d, 0xfa4e }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0xf4a6, + 0xf4a7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xef01, 0xef02, +0xef03, 0xef04 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xef01, +0xef02 }; + +#include vmlXl_n.inc -- 2.1.0
[[ARM/AArch64][testsuite] 07/36] Add vmla_lane and vmls_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmls_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc new file mode 100644 index 000..b644a0e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc @@ -0,0 +1,91 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMLX_LANE(VAR)\ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmlx_lane(vector, vector2, vector3, lane), + then store the result. */ +#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N), \ + VECT_VAR(vector3, T1, W, N2),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V) \ + TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V) + + DECL_VMLX_LANE(vector); + DECL_VMLX_LANE(vector2); + DECL_VMLX_LANE(vector_res); + + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector3, float, 32, 2); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); + + VDUP(vector2, , int, s, 16, 4, 0x55); + VDUP(vector2, , int, s, 32, 2, 0x55); + VDUP(vector2, , uint, u, 16, 4, 0x55); + VDUP(vector2, , uint, u, 32, 2, 0x55); + VDUP(vector2, , float, f, 32, 2, 55.3f); + VDUP(vector2, q, int, s, 16, 8, 0x55); + VDUP(vector2, q, int, s, 32, 4, 0x55); + VDUP(vector2, q, uint, u, 16, 8, 0x55); + VDUP(vector2, q, uint, u, 32, 4, 0x55); + VDUP(vector2, q, float, f, 32, 4, 55.8f); + + VDUP(vector3, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0xBB); + VDUP(vector3, , float, f, 32, 2, 11.34f); + + /* Choose lane arbitrarily. */ + TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2); + TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3); + TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2); + TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1); + TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1); + + CHECK_RESULTS (TEST_MSG, ); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c new file mode 100644 index 000..f4b89d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c @@ -0,0 +1,50 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +#define INSN_NAME vmla +#define TEST_MSG VMLA_LANE + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
[[ARM/AArch64][testsuite] 10/36] Add vmlal and vmlsl tests.
* gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlal.c: New file. * gcc.target/aarch64/advsimd-intrinsics/vmlsl.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc new file mode 100644 index 000..1e6bab3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc @@ -0,0 +1,89 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + /* vector_res = OP(vector, vector3, vector4), + then store the result. */ +#define TEST_VMLXL1(INSN, T1, T2, W, W2, N)\ + VECT_VAR(vector_res, T1, W, N) = \ +INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ +VECT_VAR(vector3, T1, W2, N), \ +VECT_VAR(vector4, T1, W2, N)); \ + vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_VMLXL(INSN, T1, T2, W, W2, N) \ + TEST_VMLXL1(INSN, T1, T2, W, W2, N) + + DECL_VARIABLE(vector, int, 16, 8); + DECL_VARIABLE(vector3, int, 8, 8); + DECL_VARIABLE(vector4, int, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + + DECL_VARIABLE(vector, int, 32, 4); + DECL_VARIABLE(vector3, int, 16, 4); + DECL_VARIABLE(vector4, int, 16, 4); + DECL_VARIABLE(vector_res, int, 32, 4); + + DECL_VARIABLE(vector, int, 64, 2); + DECL_VARIABLE(vector3, int, 32, 2); + DECL_VARIABLE(vector4, int, 32, 2); + DECL_VARIABLE(vector_res, int, 64, 2); + + DECL_VARIABLE(vector, uint, 16, 8); + DECL_VARIABLE(vector3, uint, 8, 8); + DECL_VARIABLE(vector4, uint, 8, 8); + DECL_VARIABLE(vector_res, uint, 16, 8); + + DECL_VARIABLE(vector, uint, 32, 4); + DECL_VARIABLE(vector3, uint, 16, 4); + DECL_VARIABLE(vector4, uint, 16, 4); + DECL_VARIABLE(vector_res, uint, 32, 4); + + DECL_VARIABLE(vector, uint, 64, 2); + DECL_VARIABLE(vector3, uint, 32, 2); + DECL_VARIABLE(vector4, uint, 32, 2); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, int, s, 64, 2); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, uint, u, 64, 2); + + VDUP(vector3, , int, s, 8, 8, 0x55); + VDUP(vector4, , int, s, 8, 8, 0xBB); + VDUP(vector3, , int, s, 16, 4, 0x55); + VDUP(vector4, , int, s, 16, 4, 0xBB); + VDUP(vector3, , int, s, 32, 2, 0x55); + VDUP(vector4, , int, s, 32, 2, 0xBB); + VDUP(vector3, , uint, u, 8, 8, 0x55); + VDUP(vector4, , uint, u, 8, 8, 0xBB); + VDUP(vector3, , uint, u, 16, 4, 0x55); + VDUP(vector4, , uint, u, 16, 4, 0xBB); + VDUP(vector3, , uint, u, 32, 2, 0x55); + VDUP(vector4, , uint, u, 32, 2, 0xBB); + + TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8); + TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4); + TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2); + TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8); + TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4); + TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2); + + CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c new file mode 100644 index 000..c147f31 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c @@ -0,0 +1,18 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +#define INSN_NAME vmlal +#define TEST_MSG VMLAL + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,8) [] = { 0xe907, 0xe908, 0xe909, 0xe90a, + 0xe90b, 0xe90c, 0xe90d, 0xe90e }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a, +0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 }; + +#include vmlXl.inc diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c new file mode 100644 index 000..6c984ae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c @@ -0,0 +1,22 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include
[[ARM/AArch64][testsuite] 23/36] Add vmul_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c new file mode 100644 index 000..978cd9b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c @@ -0,0 +1,104 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfde0, 0xfe02 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xace0, 0xb212 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b6, 0xc3ab }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc, + 0xffd0, 0xffd4, 0xffd8, 0xffdc }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfde0, 0xfe02, + 0xfe24, 0xfe46 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c, +0xccd0, 0xd114, 0xd558, 0xd99c }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xace0, 0xb212, +0xb744, 0xbc76 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b6, 0xc3ab, + 0xc39f, 0xc394 }; + +#define TEST_MSG VMUL_LANE +void exec_vmul_lane (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L) \ + VECT_VAR(vector_res, T1, W, N) = \ +vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N2),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + DECL_VARIABLE(vector2, float, 32, 2); + + clean_results (); + + /* Initialize vector from pre-initialized values. */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x22); + VDUP(vector2, , uint, u, 16, 4, 0x444); + VDUP(vector2, , uint, u, 32, 2, 0x532); + VDUP(vector2, , float, f, 32, 2, 22.8f); + + /* Choose lane arbitrarily. */ + TEST_VMUL_LANE(, int, s, 16, 4, 4, 2); + TEST_VMUL_LANE(, int, s, 32, 2, 2, 1); + TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2); + TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1); + TEST_VMUL_LANE(, float, f, 32, 2, 2, 1); + TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2); + TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0); + TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2); + TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1); + TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0); + + CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ); + CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ); + CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ); +} + +int main (void) +{ + exec_vmul_lane (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 24/36] Add vmul_n tests.
* gcc.target/aarch64/advsimd-intrinsics/vmul_n.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c new file mode 100644 index 000..be0ee65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c @@ -0,0 +1,96 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfde0, 0xfe02 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfbc0, 0xfc04 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b2, 0xc3a74000 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf, + 0xfc04, 0xfc59, 0xfcae, 0xfd03 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xf9a0, 0xfa06, + 0xfa6c, 0xfad2 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5, +0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf780, 0xf808, +0xf890, 0xf918 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000, + 0xc49b9333, 0xc4907667 }; + +#define INSN_NAME vmul_n +#define TEST_MSG VMUL_N + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ +#define DECL_VMUL(VAR) \ + DECL_VARIABLE(VAR, int, 16, 4); \ + DECL_VARIABLE(VAR, int, 32, 2); \ + DECL_VARIABLE(VAR, uint, 16, 4); \ + DECL_VARIABLE(VAR, uint, 32, 2); \ + DECL_VARIABLE(VAR, float, 32, 2);\ + DECL_VARIABLE(VAR, int, 16, 8); \ + DECL_VARIABLE(VAR, int, 32, 4); \ + DECL_VARIABLE(VAR, uint, 16, 8); \ + DECL_VARIABLE(VAR, uint, 32, 4); \ + DECL_VARIABLE(VAR, float, 32, 4) + + /* vector_res = vmul_n(vector,val), then store the result. */ +#define TEST_VMUL_N(Q, T1, T2, W, N, L) \ + VECT_VAR(vector_res, T1, W, N) = \ +vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),\ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)) + + DECL_VMUL(vector); + DECL_VMUL(vector_res); + + clean_results (); + + /* Initialize vector from pre-initialized values. */ + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, int, s, 16, 8); + VLOAD(vector, buffer, q, int, s, 32, 4); + VLOAD(vector, buffer, q, uint, u, 16, 8); + VLOAD(vector, buffer, q, uint, u, 32, 4); + VLOAD(vector, buffer, q, float, f, 32, 4); + + /* Choose multiplier arbitrarily. */ + TEST_VMUL_N(, int, s, 16, 4, 0x11); + TEST_VMUL_N(, int, s, 32, 2, 0x22); + TEST_VMUL_N(, uint, u, 16, 4, 0x33); + TEST_VMUL_N(, uint, u, 32, 2, 0x44); + TEST_VMUL_N(, float, f, 32, 2, 22.3f); + TEST_VMUL_N(q, int, s, 16, 8, 0x55); + TEST_VMUL_N(q, int, s, 32, 4, 0x66); + TEST_VMUL_N(q, uint, u, 16, 8, 0x77); + TEST_VMUL_N(q, uint, u, 32, 4, 0x88); + TEST_VMUL_N(q, float, f, 32, 4, 88.9f); + + CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ); + CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ); + CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ); + CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 25/36] Add vmull tests.
* gcc.target/aarch64/advsimd-intrinsics/vmull.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c new file mode 100644 index 000..3fdd51e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c @@ -0,0 +1,75 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +/* Expected results. */ +VECT_VAR_DECL(expected,int,16,8) [] = { 0x100, 0xe1, 0xc4, 0xa9, + 0x90, 0x79, 0x64, 0x51 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0x100, 0xe1, 0xc4, 0xa9 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x100, 0xe1 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe100, 0xe2e1, 0xe4c4, 0xe6a9, +0xe890, 0xea79, 0xec64, 0xee51 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe00100, 0xffe200e1, +0xffe400c4, 0xffe600a9 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffe00100, +0xffe200e1 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x5500, 0x5501, 0x5504, 0x5505, +0x5510, 0x5511, 0x5514, 0x5515 }; + +#define TEST_MSG VMULL +void exec_vmull (void) +{ + /* Basic test: y=vmull(x,x), then store the result. */ +#define TEST_VMULL(T1, T2, W, W2, N) \ + VECT_VAR(vector_res, T1, W2, N) =\ +vmull_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector, T1, W, N)); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + DECL_VARIABLE(vector, int, 8, 8); + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 8, 8); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector, poly, 8, 8); + DECL_VARIABLE(vector_res, int, 16, 8); + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 16, 8); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + DECL_VARIABLE(vector_res, poly, 16, 8); + + clean_results (); + + VLOAD(vector, buffer, , int, s, 8, 8); + VLOAD(vector, buffer, , int, s, 16, 4); + VLOAD(vector, buffer, , int, s, 32, 2); + VLOAD(vector, buffer, , uint, u, 8, 8); + VLOAD(vector, buffer, , uint, u, 16, 4); + VLOAD(vector, buffer, , uint, u, 32, 2); + VLOAD(vector, buffer, , poly, p, 8, 8); + + TEST_VMULL(int, s, 8, 16, 8); + TEST_VMULL(int, s, 16, 32, 4); + TEST_VMULL(int, s, 32, 64, 2); + TEST_VMULL(uint, u, 8, 16, 8); + TEST_VMULL(uint, u, 16, 32, 4); + TEST_VMULL(uint, u, 32, 64, 2); + TEST_VMULL(poly, p, 8, 16, 8); + + CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ); + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ); + CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ); +} + +int main (void) +{ + exec_vmull (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 02/36] Be more verbose, and actually confirm that a test was checked.
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (CHECK): Add trace. (CHECK_FP): Likewise. (CHECK_CUMULATIVE_SAT): Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 6464c66..2730a66 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -79,6 +79,7 @@ extern size_t strlen(const char *); abort(); \ } \ } \ +fprintf(stderr, CHECKED %s\n, MSG); \ } /* Floating-point variant. */ @@ -107,6 +108,7 @@ extern size_t strlen(const char *); abort(); \ } \ } \ +fprintf(stderr, CHECKED %s\n, MSG); \ } /* Clean buffer with a non-zero pattern to help diagnose buffer @@ -323,6 +325,7 @@ extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2); strlen(COMMENT) 0 ? COMMENT : ); \ abort(); \ } \ +fprintf(stderr, CHECKED CUMULATIVE SAT %s\n, MSG); \ } #define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment) \ -- 2.1.0
[[ARM/AArch64][testsuite] 26/36] Add vmull_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c new file mode 100644 index 000..d3aa879 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c @@ -0,0 +1,66 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x2000, 0x2000 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000, 0x2000 }; + +#define TEST_MSG VMULL_LANE +void exec_vmull_lane (void) +{ + /* vector_res = vmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VMULL_LANE(T1, T2, W, W2, N, L) \ + VECT_VAR(vector_res, T1, W2, N) =\ +vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ +VECT_VAR(vector2, T1, W, N), \ +L);\ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N)) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector, uint, 16, 4); + DECL_VARIABLE(vector, uint, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + DECL_VARIABLE(vector2, uint, 16, 4); + DECL_VARIABLE(vector2, uint, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + DECL_VARIABLE(vector_res, uint, 32, 4); + DECL_VARIABLE(vector_res, uint, 64, 2); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + VDUP(vector, , uint, u, 16, 4, 0x1000); + VDUP(vector, , uint, u, 32, 2, 0x1000); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + VDUP(vector2, , uint, u, 16, 4, 0x4); + VDUP(vector2, , uint, u, 32, 2, 0x2); + + /* Choose lane arbitrarily. */ + TEST_VMULL_LANE(int, s, 16, 32, 4, 2); + TEST_VMULL_LANE(int, s, 32, 64, 2, 1); + TEST_VMULL_LANE(uint, u, 16, 32, 4, 2); + TEST_VMULL_LANE(uint, u, 32, 64, 2, 1); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ); +} + +int main (void) +{ + exec_vmull_lane (); + return 0; +} -- 2.1.0
[[ARM/AArch64][testsuite] 35/36] Add vqdmull_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c new file mode 100644 index 000..12f2a6b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c @@ -0,0 +1,94 @@ +#include arm_neon.h +#include arm-neon-ref.h +#include compute-ref-data.h + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR(expected_cumulative_sat,int,16,4) = 0; +int VECT_VAR(expected_cumulative_sat,int,32,2) = 0; + +/* Expected results. */ +VECT_VAR_DECL(expected,int,32,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000, 0x4000 }; + +/* Expected values of cumulative_saturation flag when saturation + occurs. */ +int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1; +int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1; + +/* Expected results when saturation occurs. */ +VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff, +0x7fff, 0x7fff }; +VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fff, +0x7fff }; + +#define INSN_NAME vqdmull +#define TEST_MSG VQDMULL_LANE + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result. */ +#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ + VECT_VAR(vector_res, T1, W2, N) =\ +INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N),\ + VECT_VAR(vector2, T1, W, N),\ + L); \ + vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), \ +VECT_VAR(vector_res, T1, W2, N)); \ + CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQDMULL_LANE1(INSN_NAME, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE(vector, int, 16, 4); + DECL_VARIABLE(vector, int, 32, 2); + DECL_VARIABLE(vector2, int, 16, 4); + DECL_VARIABLE(vector2, int, 32, 2); + + DECL_VARIABLE(vector_res, int, 32, 4); + DECL_VARIABLE(vector_res, int, 64, 2); + + clean_results (); + + /* Initialize vector. */ + VDUP(vector, , int, s, 16, 4, 0x1000); + VDUP(vector, , int, s, 32, 2, 0x1000); + + /* Initialize vector2. */ + VDUP(vector2, , int, s, 16, 4, 0x4); + VDUP(vector2, , int, s, 32, 2, 0x2); + + /* Choose lane arbitrarily. */ + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat, ); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat, ); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ); + + VDUP(vector, , int, s, 16, 4, 0x8000); + VDUP(vector2, , int, s, 16, 4, 0x8000); + VDUP(vector, , int, s, 32, 2, 0x8000); + VDUP(vector2, , int, s, 32, 2, 0x8000); + +#define TEST_MSG2 with saturation + TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat2, TEST_MSG2); + TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat2, TEST_MSG2); + + CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); + CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} -- 2.1.0