date:20150113

[PATCH, AArch64] Fix abitest for ilp32

2015-01-13 Thread Hurugalawadi, Naveen

Hi,

Please find attached the patch that fixes abitest for ilp32.

testfunc_ptr is a 32bit pointer in ILP32 but is being loaded as 64bit.

Hence some of the func-ret testcases FAIL's for ILP32.

Please review the patch and let us know if its okay?

Regression tested on aarch64-elf.

Thanks,
Naveen

gcc/testsuite

2015-01-15  Andrew Pinski  apin...@cavium.com
Naveen H.S  naveen.hurugalaw...@caviumnetworks.com

* gcc.target/aarch64/aapcs64/abitest.S (LABEL_TEST_FUNC_RETURN): Load
testfunc_ptr as 32bit for ILP32 and 64bit for LP64.--- gcc/testsuite/ChangeLog	2015-01-14 10:00:59.524914610 +0530
+++ gcc/testsuite/ChangeLog	2015-01-14 10:21:20.928932740 +0530
@@ -1,3 +1,9 @@
+2015-01-15  Andrew Pinski  apin...@cavium.com
+	Naveen H.S  naveen.hurugalaw...@caviumnetworks.com
+
+	* gcc.target/aarch64/aapcs64/abitest.S (LABEL_TEST_FUNC_RETURN): Load
+	testfunc_ptr as 32bit for ILP32 and 64bit for LP64.
+
 2015-01-13  David Malcolm  dmalc...@redhat.com
 
 	* jit.dg/harness.h (set_up_logging): Move string concatenation
--- gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S	2015-01-14 09:37:46.368893934 +0530
+++ gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S	2015-01-14 10:13:08.456925431 +0530
@@ -2,6 +2,13 @@
 	.global myfunc
 	.type dumpregs,%function
 	.type myfunc,%function
+
+#ifdef __LP64__
+#define PTR_REG(n)  x##n
+#else
+#define PTR_REG(n)  w##n
+#endif
+
 dumpregs:
 myfunc:
   mov	x16, sp
@@ -48,7 +55,7 @@ myfunc:
 LABEL_TEST_FUNC_RETURN:
   adrp	x9, testfunc_ptr
   add	x9, x9, :lo12:testfunc_ptr
-  ldr	x9, [x9, #0]
+  ldr  PTR_REG(9), [x9, #0]
   blr	x9// function return value test
   adrp	x9, saved_return_address
   add	x9, x9, :lo12:saved_return_address

Drop workaround for old binutils linker plugin bug

2015-01-13 Thread Jan Hubicka

Hi,
this workaround actually triggers bug in quite recent golds, so it seems
to be good motivation to finally drop it.  The bug is long fixed.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza

* tree-profile.c (init_ic_make_global_vars): Drop workaround
for bintuils bug 14342.
(init_ic_make_global_vars): Likewise.
(gimple_init_edge_profiler): Likewise.
(gimple_gen_ic_func_profiler): Likewise.
Index: tree-profile.c
===
--- tree-profile.c  (revision 219571)
+++ tree-profile.c  (working copy)
@@ -105,30 +105,15 @@ init_ic_make_global_vars (void)
 
   ptr_void = build_pointer_type (void_type_node);
 
-  /* Workaround for binutils bug 14342.  Once it is fixed, remove lto path.  */
-  if (flag_lto)
-{
-  ic_void_ptr_var
-   = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (__gcov_indirect_call_callee_ltopriv),
- ptr_void);
-  TREE_PUBLIC (ic_void_ptr_var) = 1;
-  DECL_COMMON (ic_void_ptr_var) = 1;
-  DECL_VISIBILITY (ic_void_ptr_var) = VISIBILITY_HIDDEN;
-  DECL_VISIBILITY_SPECIFIED (ic_void_ptr_var) = true;
-}
-  else
-{
-  ic_void_ptr_var
-   = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (
- (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ?
-  __gcov_indirect_call_topn_callee :
-  __gcov_indirect_call_callee)),
- ptr_void);
-  TREE_PUBLIC (ic_void_ptr_var) = 1;
-  DECL_EXTERNAL (ic_void_ptr_var) = 1;
-}
+  ic_void_ptr_var
+= build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (
+ (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ?
+  __gcov_indirect_call_topn_callee :
+  __gcov_indirect_call_callee)),
+ ptr_void);
+  TREE_PUBLIC (ic_void_ptr_var) = 1;
+  DECL_EXTERNAL (ic_void_ptr_var) = 1;
   TREE_STATIC (ic_void_ptr_var) = 1;
   DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
   DECL_INITIAL (ic_void_ptr_var) = NULL;
@@ -138,30 +123,16 @@ init_ic_make_global_vars (void)
   varpool_node::finalize_decl (ic_void_ptr_var);
 
   gcov_type_ptr = build_pointer_type (get_gcov_type ());
-  /* Workaround for binutils bug 14342.  Once it is fixed, remove lto path.  */
-  if (flag_lto)
-{
-  ic_gcov_type_ptr_var
-   = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (__gcov_indirect_call_counters_ltopriv),
- gcov_type_ptr);
-  TREE_PUBLIC (ic_gcov_type_ptr_var) = 1;
-  DECL_COMMON (ic_gcov_type_ptr_var) = 1;
-  DECL_VISIBILITY (ic_gcov_type_ptr_var) = VISIBILITY_HIDDEN;
-  DECL_VISIBILITY_SPECIFIED (ic_gcov_type_ptr_var) = true;
-}
-  else
-{
-  ic_gcov_type_ptr_var
-   = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (
- (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ?
-  __gcov_indirect_call_topn_counters :
-  __gcov_indirect_call_counters)),
- gcov_type_ptr);
-  TREE_PUBLIC (ic_gcov_type_ptr_var) = 1;
-  DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1;
-}
+
+  ic_gcov_type_ptr_var
+= build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (
+ (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE) ?
+  __gcov_indirect_call_topn_counters :
+  __gcov_indirect_call_counters)),
+ gcov_type_ptr);
+  TREE_PUBLIC (ic_gcov_type_ptr_var) = 1;
+  DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1;
   TREE_STATIC (ic_gcov_type_ptr_var) = 1;
   DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1;
   DECL_INITIAL (ic_gcov_type_ptr_var) = NULL;
@@ -230,33 +201,18 @@ gimple_init_edge_profiler (void)
 
   init_ic_make_global_vars ();
 
-  /* Workaround for binutils bug 14342.  Once it is fixed, remove lto 
path.  */
-  if (flag_lto)
-{
- /* void (*) (gcov_type, void *)  */
- ic_profiler_fn_type
-  = build_function_type_list (void_type_node,
- gcov_type_ptr, gcov_type_node,
- ptr_void, ptr_void,
- NULL_TREE);
- tree_indirect_call_profiler_fn
- = build_fn_decl (__gcov_indirect_call_profiler,
-ic_profiler_fn_type);
-}
-  else
-{
- /* void (*) (gcov_type, void *)  */
- ic_profiler_fn_type
-  = build_function_type_list (void_type_node,
- gcov_type_node,
- ptr_void,
-

PR 64481 (bootstrap miscompare)

2015-01-13 Thread Jan Hubicka

Hi,
in December I conditoinally disabled expensive sanity checking in inliner.
This triggeres bootstrap miscompare because caches are getting out of sync.
This patch fixes the problem found by sanity check - the node growth cache
was removed from use in badness calculation by Richard a while ago, but the
cache itself remained while the updating logic was dropped.  This of course
leads to somewhat randomish results.

The other problem fixed is that in some cases we forget to walk through aliases
to get into the callee.

Bootstrapped/regtested x86_64-linux, comitted.
PR ipa/64481
* ipa-inline-analysis.c (node_growth_cache): Remove.
(initialize_growth_caches): Do not initialize it.
(free_growth_caches): Do not free it.
(do_estimate_growth): Rename to ...
(estimate_growth): ... this one; drop growth cache code.
(growth_likely_positive): Always go the heuristics way.
* ipa-inline.c (can_inline_edge_p): Walk through aliases.
(reset_edge_caches): Do not reset node growth.
(heap_edge_removal_hook): Do not maintain cache.
(inline_small_functions): Likewise; strenghten sanity check.
(ipa_inline): Do not maintain caches.
* ipa-inline.h (node_growth_cache): Remove.
(do_estimate_growth): Remove to ...
(estimate_growth): this one; remove inline version.
(reset_node_growth_cache): Remove.
Index: ipa-inline-analysis.c
===
--- ipa-inline-analysis.c   (revision 219571)
+++ ipa-inline-analysis.c   (working copy)
@@ -167,7 +167,6 @@ function_summary inline_summary * *inl
 vecinline_edge_summary_t inline_edge_summary_vec;
 
 /* Cached node/edge growths.  */
-vecint node_growth_cache;
 vecedge_growth_cache_entry edge_growth_cache;
 
 /* Edge predicates goes here.  */
@@ -1341,8 +1340,6 @@ initialize_growth_caches (void)
 {
   if (symtab-edges_max_uid)
 edge_growth_cache.safe_grow_cleared (symtab-edges_max_uid);
-  if (symtab-cgraph_max_uid)
-node_growth_cache.safe_grow_cleared (symtab-cgraph_max_uid);
 }
 
 
@@ -1352,7 +1349,6 @@ void
 free_growth_caches (void)
 {
   edge_growth_cache.release ();
-  node_growth_cache.release ();
 }
 
 
@@ -3931,7 +3927,7 @@ do_estimate_growth_1 (struct cgraph_node
 /* Estimate the growth caused by inlining NODE into all callees.  */
 
 int
-do_estimate_growth (struct cgraph_node *node)
+estimate_growth (struct cgraph_node *node)
 {
   struct growth_data d = { node, 0, false };
   struct inline_summary *info = inline_summaries-get (node);
@@ -3960,12 +3956,6 @@ do_estimate_growth (struct cgraph_node *
 + 50) / 100;
 }
 
-  if (node_growth_cache.exists ())
-{
-  if ((int) node_growth_cache.length () = node-uid)
-   node_growth_cache.safe_grow_cleared (symtab-cgraph_max_uid);
-  node_growth_cache[node-uid] = d.growth + (d.growth = 0);
-}
   return d.growth;
 }
 
@@ -3979,7 +3969,6 @@ bool
 growth_likely_positive (struct cgraph_node *node, int edge_growth 
ATTRIBUTE_UNUSED)
 {
   int max_callers;
-  int ret;
   struct cgraph_edge *e;
   gcc_checking_assert (edge_growth  0);
 
@@ -3999,10 +3988,6 @@ growth_likely_positive (struct cgraph_no
   || !node-can_remove_if_no_direct_calls_p ())
 return true;
 
-  /* If there is cached value, just go ahead.  */
-  if ((int)node_growth_cache.length ()  node-uid
-   (ret = node_growth_cache[node-uid]))
-return ret  0;
   if (!node-will_be_removed_from_program_if_no_direct_calls_p ()
(!DECL_COMDAT (node-decl)
  || !node-can_remove_if_no_direct_calls_p ()))
Index: ipa-inline.c
===
--- ipa-inline.c(revision 219571)
+++ ipa-inline.c(working copy)
@@ -388,11 +388,11 @@ can_inline_edge_p (struct cgraph_edge *e
   else if (caller_tree != callee_tree)
 {
   if (((opt_for_fn (e-caller-decl, optimize)
-opt_for_fn (e-callee-decl, optimize))
+opt_for_fn (callee-decl, optimize))
|| (opt_for_fn (e-caller-decl, optimize_size)
-   != opt_for_fn (e-callee-decl, optimize_size)))
+   != opt_for_fn (callee-decl, optimize_size)))
  /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
-  !DECL_DISREGARD_INLINE_LIMITS (e-callee-decl))
+  !DECL_DISREGARD_INLINE_LIMITS (callee-decl))
{
  e-inline_failed = CIF_OPTIMIZATION_MISMATCH;
  inlinable = false;
@@ -1095,9 +1095,6 @@ reset_edge_caches (struct cgraph_node *n
   if (where-global.inlined_to)
 where = where-global.inlined_to;
 
-  /* WHERE body size has changed, the cached growth is invalid.  */
-  reset_node_growth_cache (where);
-
   for (edge = where-callers; edge; edge = edge-next_caller)
 if (edge-inline_failed)
   reset_edge_growth_cache (edge);
@@ -1428,8 +1425,6 @@ add_new_edges_to_heap (edge_heap_t *heap
 static void

Re: [PATCH 0/6, nds32] Committed: Have -mcmodel option to support varied code model of programs.

2015-01-13 Thread Chung-Ju Wu

2015-01-14 6:22 GMT+08:00 Joseph Myers jos...@codesourcery.com:
 On Tue, 13 Jan 2015, Chung-Ju Wu wrote:

 To fix this issue, we are going to use -mcmodel=X options, which probably
 gives more flexibility to support varied code model on code generation.
 The -mgp-direct option now becomes meaningless and can be discarded.

 If you add or remove command-line options, you need to update invoke.texi
 accordingly.


Thanks for your reminder.
I will propose another patch to update documentation as soon as possible.

Best regards,
jasonwucj

 --
 Joseph S. Myers
 jos...@codesourcery.com

[PATCH] Fix PR c++/16160

2015-01-13 Thread Patrick Palka

This patch fixes the above PR where it was reported that the C++
frontend does not reject the malformed class declaration

struct X5;

Instead of rejecting it, the FE treats this declaration as if it were a
forward declaration of a template specialization, i.e. as if it were
written

template struct X5;

First off, the FE should reject the declaration because it is malformed
(not 100% sure, though).  Second, since the user probably intended to
have written an explicit template instantiation (as in the PR), the FE
should suggest adding template before such a declaration, that is the
declaration

struct X5; // error + suggest adding template

This patch does both these things along with adding error messages +
suggestions for

struct X5 { }; // error + suggest adding template 

and

template struct X5 { }; // error + suggest replacing with template 

Bootstrap and regtesting in progress.  Does this patch look OK for trunk?

gcc/cp/ChangeLog:

PR c++/16160
* parser.c (cp_parser_class_head): Identify and reject malformed
template-id declarations and definitions.
---
 gcc/cp/parser.c  | 53 +++-
 gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C |  2 +-
 gcc/testsuite/g++.dg/ext/attrib9.C   |  2 +-
 gcc/testsuite/g++.dg/template/crash54.C  |  2 +-
 gcc/testsuite/g++.dg/template/error55.C  | 11 +++
 5 files changed, 53 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/error55.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 3290dfa..f6dc004 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -20264,6 +20264,34 @@ cp_parser_class_head (cp_parser* parser,
 }
   virt_specifiers = cp_parser_virt_specifier_seq_opt (parser);
 
+  /* Make sure a top-level template-id declaration or definition is preceded
+ by template or template .  */
+  if (template_id_p
+   at_namespace_scope_p ()
+   parser-num_template_parameter_lists == 0
+   !processing_explicit_instantiation)
+{
+  if (cp_parser_next_token_starts_class_definition_p (parser))
+   {
+  error_at (type_start_token-location,
+   an explicit specialization must be preceded by 
+   %template %);
+ invalid_explicit_specialization_p = true;
+ /* Try to recover gracefully by taking the same action that would
+have been taken by cp_parser_explicit_specialization.  */
+ ++parser-num_template_parameter_lists;
+ begin_specialization ();
+   }
+  else if (cp_parser_declares_only_class_p (parser))
+   {
+  error_at (type_start_token-location,
+   an explicit instantiation must be preceded by 
+   %template%);
+ type = error_mark_node;
+ goto out;
+   }
+}
+
   /* If it's not a `:' or a `{' then we can't really be looking at a
  class-head, since a class-head only appears as part of a
  class-specifier.  We have to detect this situation before calling
@@ -20275,6 +20303,16 @@ cp_parser_class_head (cp_parser* parser,
   goto out;
 }
 
+  if (processing_explicit_instantiation)
+{
+  error_at (type_start_token-location,
+   an explicit instantiation may not have a definition);
+  inform (type_start_token-location,
+ use %template % to define an explicit specialization);
+  type = error_mark_node;
+  goto out;
+}
+
   /* At this point, we're going ahead with the class-specifier, even
  if some other problem occurs.  */
   cp_parser_commit_to_tentative_parse (parser);
@@ -20346,20 +20384,7 @@ cp_parser_class_head (cp_parser* parser,
  num_templates = 0;
}
 }
-  /* An explicit-specialization must be preceded by template .  If
- it is not, try to recover gracefully.  */
-  if (at_namespace_scope_p ()
-   parser-num_template_parameter_lists == 0
-   template_id_p)
-{
-  error_at (type_start_token-location,
-   an explicit specialization must be preceded by %template 
%);
-  invalid_explicit_specialization_p = true;
-  /* Take the same action that would have been taken by
-cp_parser_explicit_specialization.  */
-  ++parser-num_template_parameter_lists;
-  begin_specialization ();
-}
+
   /* There must be no return statements between this point and the
  end of this function; set type to the correct return value and
  use goto done; to return.  */
diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C 
b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C
index 3dc51ee..4957ba1 100644
--- a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C
+++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-9.C
@@ -9,4 +9,4 @@ enum [[gnu::unused]] e; // { dg-warning already defined }
 struct [[gnu::unused]] B *p;   //  { dg-warning attributes }
 
 template class T struct A { };
-struct [[gnu::unused]] Aint; //  { dg-warning attributes }
+struct [[gnu::unused]]

Patch ping...

2015-01-13 Thread Jan Hubicka

Hi,
I would like to ping the patch to fix divergence between a type and its main 
variant introduced by C++ FE.
https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01202.html

Honza

Re: [PATCH/expand] PR64011 Adjust bitsize when partial overflow happen for big-endian

2015-01-13 Thread Jeff Law


On 01/13/15 15:42, Joseph Myers wrote:

On Tue, 13 Jan 2015, Jeff Law wrote:


In many ways having the compiler or assembler spitting out an error here is
preferable to silently compiling the code.  That would also help explain why


As usual, an error is incorrect in such a case that only has undefined
behavior at runtime (but it may be compiled into an abort if the behavior
is unconditionally undefined, and the abort doesn't replace anything
before the undefined behavior that might have stopped the undefined
behavior from occurring).
You are, of course, correct.  We can't error here, but we can generate a 
conditional warning.


jeff

Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)

2015-01-13 Thread Jeff Law


On 01/13/15 17:03, Segher Boessenkool wrote:

On Tue, Jan 13, 2015 at 03:17:08PM -0700, Jeff Law wrote:

And finally there is the case of non-volatile asm with memory clobber
with
no memory stores in between the two - the posted (safer) patch will not
allow to CSE the two, while in theory we could CSE them into just one asm.

I think we have to assume that CSEing them is wrong.  The first may set
something in memory that is read by the second.

Thoughts?


I agree with pretty much everything you say in the thread, except for this
idea that a memory clobber reads memory.  No clobber reads anything.

The commit that introduced the memory clobber concept, 426b38c9 (svn 1207),
by rms, has as only comment

/* `memory', don't cache memory across asm */
RMS botched this and you can see it in that the scheduler was not 
updated at the same time.  The scheduler absolutely must track if an ASM 
does a memory read of an arbitrary location.  I'd have to dig deeper to 
see when this got fixed, but it was clearly botched.



Many years later another pass which needs to precisely track such things 
came along, namely DSE.


The code in DSE is actually easier to grok.


First, if you look at the ASM handling in cfgexpand.c you'll find:

if (j == -4)  /* `memory', don't cache memory across asm */
{
  XVECEXP (body, 0, i++)
= gen_rtx_CLOBBER (VOIDmode,
   gen_rtx_MEM
   (BLKmode,
gen_rtx_SCRATCH (VOIDmode)));
  continue;
}


So we generate (CLOBBER (MEM:BLK (SCRATCH))) when we see memory in the 
clobber list of an ASM.



If you then look at dse.c we have this in record_store:

 /* At this point we know mem is a mem. */
  if (GET_MODE (mem) == BLKmode)
{
  if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
{
  if (dump_file  (dump_flags  TDF_DETAILS))
fprintf (dump_file,  adding wild read for (clobber 
(mem:BLK (scratch))\n);

  add_wild_read (bb_info);
  insn_info-cannot_delete = true;
  return 0;
}


Which says very precisely that we treat (CLOBBER (MEM:BLK (SCRATCH))) as 
potentially *reading* any location.


If you trace through how the scheduler builds dependencies, paying 
particular attention to alias.c you'll see that (CLOBBER (MEM:BLK 
(SCRATCH))) is treated as both a read and a write of an arbitrary location.


It's unfortunate that RMS put the memory tag in the clobber list. 
But he really wasn't a compiler junkie and didn't realize the right 
thing to do was to have a memory tag in both the inputs and 
[output|clobber] section to represent a read of an arbitrary location 
and a write to an arbitrary location independently.  But it is what it 
is at this point and we have to treat memory appearing in the 
clobber list as an arbitrary memory read and an arbitrary memory write.




Jeff

[PATCH, nds32] Committed: Remove some features that are not available yet in nds32 port of GNU binutils package.

2015-01-13 Thread Chung-Ju Wu

Hi, all,

The nds32 target supports two features, fp-as-gp and ex9, designed
for code size optimizations.  They are majorly performed by linker
so that compiler is merely to give some hints or directives with
-mforce-fp-as-gp, -mforbid-fp-as-gp, and -mex9 options.

However, those two features are not available yet in the current
nds32 port of GNU binutils package.  For consistency concern, I think
it would be better to remove them from gcc trunk as well for now.

Committed as Rev. 219576: https://gcc.gnu.org/r219576


gcc/ChangeLog

2015-01-14  Chung-Ju Wu  jasonw...@gmail.com

* config/nds32/nds32.opt (mforce-fp-as-gp): Remove.
(mforbid-fp-as-gp): Remove.
(mex9): Remove.
* config/nds32/nds32-fp-as-gp.c (nds32_have_prologue_p): Remove.
(nds32_symbol_load_store_p): Remove.
(nds32_fp_as_gp_check_available): Clean up implementation.
* config/nds32/nds32.h (LINK_SPEC): Remove -mforce-as-gp and -mex9
cases.
* config/nds32/nds32.c (nds32_asm_file_start): No need to consider
fp-as-gp and ex9 cases.


Best regards,
jasonwucj


0010-Remove-some-features-that-are-not-available-yet-in-n.patch
Description: Binary data

Re: [committed] Update copyright years, part 2

2015-01-13 Thread Richard Sandiford

Jakub Jelinek ja...@redhat.com writes:
 Patch too large to attach uncompressed, this
 has been created with update-copyright.py --this-year.
 Note, I had to temporarily move away gcc/jit/docs/conf.py,
 the python script dies on that and leaves almost all files unchanged.

Thanks for doing the update.  Is the patch below OK to fix the JIT thing?
After this change, update-copyright.py --this-year seems to update
gcc/jit correctly (including the texinfo files).

Richard


contrib/
* update-copyright.py (Copyright.__init__): Add a regexp for
copyright = u'.
(Copyright.update_copyright): Don't add a space before the year
in that case.

Index: contrib/update-copyright.py
===
--- contrib/update-copyright.py 2014-08-05 10:29:02.695491816 +0100
+++ contrib/update-copyright.py 2015-01-13 14:13:43.500812967 +
@@ -183,6 +183,7 @@ class Copyright:
 '|[Cc]opyright\s+%s'
 '|[Cc]opyright\s+copy;'
 '|[Cc]opyright\s+@copyright{}'
+'|copyright = u\''
 '|@set\s+copyright[\w-]+)'
 
 # 2: the years.  Include the whitespace in the year, so that
@@ -363,7 +364,8 @@ class Copyright:
 return (False, orig_line, next_line)
 
 line = (line[:match.start (2)]
-+ ' ' + canon_form + self.separator
++ ('' if intro.startswith ('copyright = ') else ' ')
++ canon_form + self.separator
 + line[match.end (2):])
 
 # Use the standard (C) form.

Re: shift/extract SHIFT_COUNT_TRUNCATED combine bug

2015-01-13 Thread Segher Boessenkool

On Tue, Jan 13, 2015 at 10:51:27AM +0100, Richard Biener wrote:
 IMHO SHIFT_COUNT_TRUNCATED should be removed and instead
 backends should provide shift patterns with a (and:QI ...) for the
 shift amount which simply will omit that operation if suitable.

Note that that catches less though, e.g. in

int f(int x, int n) { return x  ((2*n)  31); }

without SHIFT_COUNT_TRUNCATED it will try to match an AND with 30,
not with 31.


Segher

[PATCH][AArch64 Intrinsics] Replace temporary assembler for vst1_lane

2015-01-13 Thread Alan Lawrence

Nowadays, just storing the (bigendian-corrected) vector element to the address, 
generates exactly the same assembler for all cases except 
{float,int,uint}64x1_t, where

st1 {v0.d}[0], [x0]
becomes
str d0, [x0]

This is not a problem, and the change will be much better for optimization 
through the midend, as well as making use of previous improvements in error 
reporting.


Also move the /* vst1q */ comment, which was a couple intrinsics too late.

gcc/ChangeLog:

* config/aarch64/arm_neon.h (vst1_lane_f32, vst1_lane_f64,
vst1_lane_p8, vst1_lane_p16, vst1_lane_s8, vst1_lane_s16,
vst1_lane_s32, vst1_lane_s64, vst1_lane_u8, vst1_lane_u16,
vst1_lane_u32, vst1_lane_u64, vst1q_lane_f32, vst1q_lane_f64,
vst1q_lane_p8, vst1q_lane_p16, vst1q_lane_s8, vst1q_lane_s16,
vst1q_lane_s32, vst1q_lane_s64, vst1q_lane_u8, vst1q_lane_u16,
vst1q_lane_u32, vst1q_lane_u64): Reimplement with pointer dereference
and __aarch64_vget_lane_any.

Cross-tested check-gcc on aarch64-none-elf and aarch64_be-none-elf.

Ok for trunk?

Cheers, Alancommit 926aec661699e52f617f16068075ef0242a43609
Author: Alan Lawrence alan.lawre...@arm.com
Date:   Thu Dec 11 17:29:54 2014 +

Replace temporary inline assembler for vst1_lane, move /* vst1q */ comment.

Note for (float|u?int)64x1 vectors,
   st1 {v0.d}[0], [x0]
becomes
   str d0, [x0]

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 3d1bcd5..980490f 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10304,272 +10304,6 @@ vrsqrtss_f32 (float32_t a, float32_t b)
result;  \
  })
 
-#define vst1_lane_f32(a, b, c)  \
-  __extension__ \
-({  \
-   float32x2_t b_ = (b);\
-   float32_t * a_ = (a);\
-   __asm__ (st1 {%1.s}[%2],[%0]   \
-:   \
-: r(a_), w(b_), i(c)  \
-: memory);\
- })
-
-#define vst1_lane_f64(a, b, c)  \
-  __extension__ \
-({  \
-   float64x1_t b_ = (b);\
-   float64_t * a_ = (a);\
-   __asm__ (st1 {%1.d}[%2],[%0]   \
-:   \
-: r(a_), w(b_), i(c)  \
-: memory);\
- })
-
-#define vst1_lane_p8(a, b, c)   \
-  __extension__ \
-({  \
-   poly8x8_t b_ = (b);  \
-   poly8_t * a_ = (a);  \
-   __asm__ (st1 {%1.b}[%2],[%0]   \
-:   \
-: r(a_), w(b_), i(c)  \
-: memory);\
- })
-
-#define vst1_lane_p16(a, b, c)  \
-  __extension__ \
-({  \
-   poly16x4_t b_ = (b); \
-   poly16_t * a_ = (a); \
-   __asm__ (st1 {%1.h}[%2],[%0]   \
-:   \
-: r(a_), w(b_), i(c)  \
-: memory);\
- })
-
-#define vst1_lane_s8(a, b, c)   \
-  __extension__ \
-({  \
-   int8x8_t b_ = (b);   \
-   int8_t * a_ = (a);   \
-   __asm__ (st1 {%1.b}[%2],[%0]   \
-:   \
-

Re: [committed] Update copyright years, part 2

2015-01-13 Thread Jakub Jelinek

On Tue, Jan 13, 2015 at 05:06:35PM +, Richard Sandiford wrote:
 Jakub Jelinek ja...@redhat.com writes:
  Patch too large to attach uncompressed, this
  has been created with update-copyright.py --this-year.
  Note, I had to temporarily move away gcc/jit/docs/conf.py,
  the python script dies on that and leaves almost all files unchanged.
 
 Thanks for doing the update.  Is the patch below OK to fix the JIT thing?
 After this change, update-copyright.py --this-year seems to update
 gcc/jit correctly (including the texinfo files).
 
 Richard
 
 
 contrib/
   * update-copyright.py (Copyright.__init__): Add a regexp for
   copyright = u'.
   (Copyright.update_copyright): Don't add a space before the year
   in that case.

Ok, thanks.

 --- contrib/update-copyright.py   2014-08-05 10:29:02.695491816 +0100
 +++ contrib/update-copyright.py   2015-01-13 14:13:43.500812967 +
 @@ -183,6 +183,7 @@ class Copyright:
  '|[Cc]opyright\s+%s'
  '|[Cc]opyright\s+copy;'
  '|[Cc]opyright\s+@copyright{}'
 +'|copyright = u\''
  '|@set\s+copyright[\w-]+)'
  
  # 2: the years.  Include the whitespace in the year, so that
 @@ -363,7 +364,8 @@ class Copyright:
  return (False, orig_line, next_line)
  
  line = (line[:match.start (2)]
 -+ ' ' + canon_form + self.separator
 ++ ('' if intro.startswith ('copyright = ') else ' ')
 ++ canon_form + self.separator
  + line[match.end (2):])
  
  # Use the standard (C) form.

Jakub

Re: [PATCH] [AArch64, NEON] Improve vpmaxX vpminX intrinsics

2015-01-13 Thread Tejas Belagod


On 09/12/14 08:17, Yangfei (Felix) wrote:

On 28 November 2014 at 09:23, Yangfei (Felix) felix.y...@huawei.com wrote:

Hi,
   This patch converts vpmaxX  vpminX intrinsics to use builtin functions

instead of the previous inline assembly syntax.

   Regtested with aarch64-linux-gnu on QEMU.  Also passed the glorious

testsuite of Christophe Lyon.

   OK for the trunk?


Hi Felix,   We know from experience that the advsimd intrinsics tend
to be fragile for big endian and in general it is fairly easy to break the big 
endian
case.  For these advsimd improvements that you are working on (that we very
much appreciate) it is important to run both little endian and big endian
regressions.

Thanks
/Marcus



Okay.  Any plan for the advsimd big-endian improvement?
I rebased this patch over Alan Lawrance's patch: 
https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00279.html
No regressions for aarch64_be-linux-gnu target too.  OK for the thunk?


Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 218464)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,18 @@
+2014-12-09  Felix Yang  felix.y...@huawei.com
+
+   * config/aarch64/aarch64-simd.md (aarch64_maxmin_unspmode): New
+   pattern.
+   * config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp,
+   uminp, smax_nanp, smin_nanp): New builtins.
+   * config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32,
+   vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16, vpmaxq_s32,
+   vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32, vpmaxq_f64,
+   vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32, vpmaxnmq_f64,
+   vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32, vpmin_u8,
+   vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32, vpminq_u8,
+   vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64, vpminqd_f64,
+   vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64, vpminnmqd_f64,
+




  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
Index: gcc/config/aarch64/aarch64-simd.md
===
--- gcc/config/aarch64/aarch64-simd.md  (revision 218464)
+++ gcc/config/aarch64/aarch64-simd.md  (working copy)
@@ -1017,6 +1017,28 @@
DONE;
  })

+;; Pairwise Integer Max/Min operations.
+(define_insn aarch64_maxmin_unspmode
+ [(set (match_operand:VDQ_BHSI 0 register_operand =w)
+   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 register_operand w)
+(match_operand:VDQ_BHSI 2 register_operand w)]
+   MAXMINV))]
+ TARGET_SIMD
+ maxmin_uns_opp\t%0.Vtype, %1.Vtype, %2.Vtype
+  [(set_attr type neon_minmaxq)]
+)
+


Hi Felix,

Sorry for the delay in getting back to you on this.

If you've rolled aarch64_reduc_maxmin_uns_internalv2si into the above 
pattern, do you still need it? For all its call points, just point them 
to aarch64_maxmin_unspmode?


Thanks,
Tejas.

Re: [PATCH] add option to emit more array bounds warnigs

2015-01-13 Thread Martin Uecker


Mon, 12 Jan 2015 11:00:44 -0700
Jeff Law l...@redhat.com:
 On 11/11/14 23:13, Martin Uecker wrote:

...

 
 
  * gcc/tree-vrp.c (check_array_ref): Emit more warnings
  for warn_array_bounds = 2.
  * gcc/testsuite/gcc.dg/Warray-bounds-11.c: New test-case.
  * gcc/c-family/c.opt: New option -Warray-bounds=.
  * gcc/common.opt: New option -Warray-bounds=.
  * gcc/doc/invoke.texi: Document new option.
 Has this patch been bootstrapped and regression tested, if so on what 
 platform.

x86_64-unknown-linux-gnu

 Given the new warnings (as implemented by the patch) are not enabled by 
 default, I'm inclined to approve once Martin verifies things via 
 bootstrap and regression test.

Thank you,

Martin

[PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips

2015-01-13 Thread Andrew Bennett

Hi,

The call-saved-{4-6}.c tests in the mips testsuite fail for micromips.  The 
reason is 
that micromips uses the swm and lwm instructions to save/restore the call-saved 
registers 
rather than using the sw and lw instructions.  The swm and lwm instructions 
only list 
the range of registers to use ie. $16-$25 and hence some of the scan-assembler 
patterns fail.  This fix adds the NO_COMPRESSION attribute to the foo function 
to 
force the tests to always compile as mips.
 
I have tested this for both mips and micromips, and the tests now pass 
successfully.  
The ChangeLog and patch are below.

Ok to commit?


Many thanks,


Andrew


testsuite/

* gcc.target/mips/call-saved-4.c: Add NO_COMPRESSION attribute.
* gcc.target/mips/call-saved-5.c: Likewise.
* gcc.target/mips/call-saved-6.c: Likewise.



diff --git a/gcc/testsuite/gcc.target/mips/call-saved-4.c 
b/gcc/testsuite/gcc.target/mips/call-saved-4.c
index 846ea32..92881c4 100644
--- a/gcc/testsuite/gcc.target/mips/call-saved-4.c
+++ b/gcc/testsuite/gcc.target/mips/call-saved-4.c
@@ -3,7 +3,7 @@
 
 void bar (void);
 
-void
+NOCOMPRESSION void
 foo (int x)
 {
   __builtin_unwind_init ();
diff --git a/gcc/testsuite/gcc.target/mips/call-saved-5.c 
b/gcc/testsuite/gcc.target/mips/call-saved-5.c
index 2937b31..152b28f 100644
--- a/gcc/testsuite/gcc.target/mips/call-saved-5.c
+++ b/gcc/testsuite/gcc.target/mips/call-saved-5.c
@@ -3,7 +3,7 @@
 
 void bar (void);
 
-void
+NOCOMPRESSION void
 foo (int x)
 {
   __builtin_unwind_init ();
diff --git a/gcc/testsuite/gcc.target/mips/call-saved-6.c 
b/gcc/testsuite/gcc.target/mips/call-saved-6.c
index 0d1a4c8..a384d4a 100644
--- a/gcc/testsuite/gcc.target/mips/call-saved-6.c
+++ b/gcc/testsuite/gcc.target/mips/call-saved-6.c
@@ -3,7 +3,7 @@
 
 void bar (void);
 
-void
+NOCOMPRESSION void
 foo (int x)
 {
   __builtin_unwind_init ();

RE: [PATCH] Fix for PR64081 in RTL loop unroller

2015-01-13 Thread Zamyatin, Igor

 
 Is it really sufficient here to verify that all the defs are on latch 
 predecessors,
 what about the case where there is a predecessor without a def.  How do
 you guarantee domination in that case?
 
 ISTM that given the structure for the code you're writing that you'd want to
 verify that in the event of multiple definitions that all of them appear on
 immediate predecessors of the latch *and* that each immediate
 predecessor has a definition.

Yes, do you think it's better to check exactly immediate predecessors?

  -
  -  if (!just_once_each_iteration_p (current_loop, DF_REF_BB (adef)))
  -   return false;
  +   {
  + def_num++;
  + if (!(def_pred_latch = def_pred_latch_p (adef))
  + || !rtx_equal_p( PATTERN (DF_REF_INSN (single_rd)),
 
 Whitespace nit here.  Whitespace goes before the open paren for the
 function call, not after.

Thanks for catching this!

 
 
  @@ -351,10 +384,10 @@ latch_dominating_def (rtx reg, df_ref *def)
static enum iv_grd_result
iv_get_reaching_def (rtx_insn *insn, rtx reg, df_ref *def)
 
 And in this routine, you appear to do both checks.  ie, each def is on an
 immediate predecessor and each immediate predecessor has a def.  Is there
 some reason why iv_get_reaching_def has the stronger check while
 latch_dominating_def does not?

Looks like I was sure that latch_dominating_def always goes after 
iv_get_reaching_def but now I see it is not true. Will add another check in 
latch_dominating_def.

Thanks,
Igor

 
 jeff

Re: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips

2015-01-13 Thread Maciej W. Rozycki

On Tue, 13 Jan 2015, Andrew Bennett wrote:

 The call-saved-{4-6}.c tests in the mips testsuite fail for micromips.  The 
 reason is 
 that micromips uses the swm and lwm instructions to save/restore the 
 call-saved registers 
 rather than using the sw and lw instructions.  The swm and lwm instructions 
 only list 
 the range of registers to use ie. $16-$25 and hence some of the 
 scan-assembler 
 patterns fail.  This fix adds the NO_COMPRESSION attribute to the foo 
 function to 
 force the tests to always compile as mips.
  
 I have tested this for both mips and micromips, and the tests now pass 
 successfully.  
 The ChangeLog and patch are below.

 Hmm, instead of trying to avoid testing microMIPS code generation just to 
satisfy the test suite I'd rather see the test cases updated so that 
LWM/SWM register ranges are expected and accepted whenever microMIPS code 
is produced.  These scan patterns can be made conditional.

  Maciej

Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)

2015-01-13 Thread Jeff Law


On 01/13/15 09:18, Jakub Jelinek wrote:

Hi!

My PR60663 fix unfortunately stopped CSE of all inline-asms, even when
they e.g. only have the clobbers added by default.

This patch attempts to restore the old behavior, with the exceptions:
1) as always, asm volatile is not CSEd
2) inline-asm with multiple outputs are not CSEd
3) on request from Richard (which Segher on IRC argues against), memory
clobber also prevents CSE; this can be removed by removing the
int j, lim = XVECLEN (x, 0); and loop below it
4) inline-asm with clobbers is never copied into an insn that wasn't
inline-asm before, so if there are clobbers, we allow CSEing of
e.g. two same inline-asms, but only by reusing results of one
of those

Bootstrapped/regtested on x86_64-linux and i686-linux, tested also
with arm cross after reverting the PR60663 arm cost fix.

Ok for trunk this way, or with 3) removed?

2015-01-13  Jakub Jelinek  ja...@redhat.com

PR rtl-optimization/63637
PR rtl-optimization/60663
* cse.c (merge_equiv_classes): Set new_elt-cost to MAX_COST
if elt-cost is MAX_COST for ASM_OPERANDS.
(find_sets_in_insn): Fix up comment typo.
(cse_insn): Don't set src_volatile for all non-volatile
ASM_OPERANDS in PARALLELs, but just those with multiple outputs
or with memory clobber.  Set elt-cost to MAX_COST
for ASM_OPERANDS in PARALLEL.  Set src_elt-cost to MAX_COST
if new_src is ASM_OPERANDS and elt-cost is MAX_COST.

* gcc.dg/pr63637-1.c: New test.
* gcc.dg/pr63637-2.c: New test.
* gcc.dg/pr63637-3.c: New test.
* gcc.dg/pr63637-4.c: New test.
* gcc.dg/pr63637-5.c: New test.
* gcc.dg/pr63637-6.c: New test.
* gcc.target/i386/pr63637-1.c: New test.
* gcc.target/i386/pr63637-2.c: New test.
* gcc.target/i386/pr63637-3.c: New test.
* gcc.target/i386/pr63637-4.c: New test.
* gcc.target/i386/pr63637-5.c: New test.
* gcc.target/i386/pr63637-6.c: New test.

--- gcc/cse.c.jj2015-01-09 21:59:44.0 +0100
+++ gcc/cse.c   2015-01-13 13:26:23.391216064 +0100
@@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c
}
  new_elt = insert (exp, class1, hash, mode);
  new_elt-in_memory = hash_arg_in_memory;
+ if (GET_CODE (exp) == ASM_OPERANDS  elt-cost == MAX_COST)
+   new_elt-cost = MAX_COST;
}
  }
  }
@@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc
  {
int i, lim = XVECLEN (x, 0);

-  /* Go over the epressions of the PARALLEL in forward order, to
+  /* Go over the expressions of the PARALLEL in forward order, to
 put them in the same order in the SETS array.  */
for (i = 0; i  lim; i++)
{
@@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn)
   REGNO (dest) = FIRST_PSEUDO_REGISTER)
sets[i].src_volatile = 1;

-  /* Also do not record result of a non-volatile inline asm with
-more than one result or with clobbers, we do not want CSE to
-break the inline asm apart.  */
else if (GET_CODE (src) == ASM_OPERANDS
GET_CODE (x) == PARALLEL)
-   sets[i].src_volatile = 1;
+   {
+ /* Do not record result of a non-volatile inline asm with
+more than one result.  */
+ if (n_sets  1)
+   sets[i].src_volatile = 1;
+
+ int j, lim = XVECLEN (x, 0);
+ for (j = 0; j  lim; j++)
+   {
+ rtx y = XVECEXP (x, 0, j);
+ /* And do not record result of a non-volatile inline asm
+with memory clobber.  */
+ if (GET_CODE (y) == CLOBBER  MEM_P (XEXP (y, 0)))
Can you please add a comment here which references the full form of the 
memory tag.  (clobber (mem:BLK (scratch))).


If we ever have to look at this again (say perhaps to break out the read 
anything vs write anything into separate tags :-) it'll save 
considerable time and angst trying to track all this stuff down.


The tests you've got are a step forward, but there's obviously a lot 
more we could do.  For example testing DSE around ASMs without and 
without a memory clobber, testing CSE of unrelated memory references 
around an ASM without and without a memory clobber come to mind.   You 
don't have to add them to get approval, but if you were to take the time 
to cobble them together it'd be hugely appreciated.



Given the discussion with Segher, let's give him a chance to chime in on 
tonight's messages before we make a final decision.


jeff

Re: [PATCH] add option to emit more array bounds warnigs

2015-01-13 Thread Jeff Law


On 01/13/15 17:40, Martin Uecker wrote:

Jeff Law l...@redhat.com:

On 01/13/15 10:34, Martin Uecker wrote:

Mon, 12 Jan 2015 11:00:44 -0700
Jeff Law l...@redhat.com:

On 11/11/14 23:13, Martin Uecker wrote:


...


Has this patch been bootstrapped and regression tested, if so on what
platform.


x86_64-unknown-linux-gnu

Approved.  Please install on the trunk.  Sorry about the delays.


I don't have write access ;-(

I fixed up the ChangeLog entries and installed the patch for you.

If you plan to contribute regularly, you should go ahead and apply for 
write access to the repository so that you'll be able to commit your own 
patches once they're approved.


You'll also need to make sure you have an assignment on file with the 
FSF.That patch was pretty small (the testcase was larger than the 
patch itself, which I always like :-) so I didn't request an assignment. 
 Further submissions likely will require an assignment.


Thanks,
jeff

Re: [PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Jeff Law


On 01/13/15 15:56, Andrew MacLeod wrote:

On 01/13/2015 02:06 PM, Andrew MacLeod wrote:

On 01/13/2015 01:38 PM, Torvald Riegel wrote:

On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote:

On 01/13/2015 09:59 AM, Richard Biener wrote:

On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod
amacl...@redhat.com wrote:

Lengthy discussion :
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

Basically we can generate incorrect code for an atomic consume
operation in
some circumstances.  The general feeling seems to be that we
should simply
promote all consume operations to an acquire operation until there
is a
better definition/understanding of the consume model and how GCC
can track
it.

I proposed a simple patch in the PR, and I have not seen or heard
of any
dissenting opinion.   We should get this in before the end of
stage 3 I
think.

The problem with the patch in the PR is the  memory model is
immediately
promoted from consume to acquire.   This happens *before* any of the
memmodel checks are made.  If a consume is illegally specified
(such as in a
compare_exchange), it gets promoted to acquire and the compiler
doesn't
report the error because it never sees the consume.

This new patch simply makes the adjustment after any errors are
checked on
the originally specified model.   It bootstraps on
x86_64-unknown-linux-gnu
and passes all regression testing.
I also built an aarch64 compiler and it appears to issue the LDAR as
specified in the PR, but anyone with a vested interest really
ought to check
it out with a real build to be sure.

OK for trunk?

Why not patch get_memmodel?  (not sure if that catches all cases)

Richard.



That was the original patch.

The issue is that it  promotes consume to acquire before any error
checking gets to look at the model, so then we allow illegal
specification of consume. (It actually triggers a failure in the
testsuite)

(This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c)

The documentation of the atomic builtins also disallows mo_consume on
atomic_exchange.

However, I don't see any such requirement in C11 or C++14 (and I'd be
surprised to see it in C++11).  It would be surprising also because for
other atomic read-modify-write operations (eg, fetch_add), we don't make
such a requirement in the builtins docs -- and atomic_exchange is just a
read-modify-write with a noop, basically.

Does anyone remember why this requirement for no consume on exchange was
added, or sees a reason to keep it?  If not, I think we should drop it.
This would solve the testsuite failure for Andrew.  Dropping it would
prevent GCC from checking the consume-on-success / acquire-on-failure
case for compare_excahnge I mentioned previously, but I think that this
is pretty harmless.

I could imagine that, for some reason, either backends or libatomic do
not implement consume on atomic_exchange just because the docs
disallowed it -- but I haven't checked that.


I imagine it was probably in a previous incarnation of the
standard...  Most of this was actually implemented  based on very
early draft standards years and years ago and never revised.   It
wasnt put in by me unless the standard at some point said had such
wording.   The current standard appears to make no mention of the
situation.

It seems that it should be safe to move back to the original patch,
and remove that error test for using consume on an exchange...

Andrew

Here's the original patch along with the lien removed from the testcase.
x86_64-unknown-linux-gnu bootstraps, no regressions, and so forth.

OK for trunk?

-ENOPATCH

However, I can get it from the BZ and it's OK assuming you also fixup 
the one testcase we've discussed on this thread.


Jeff

Re: [PATCH][rtlanal.c][BE][1/2] Fix vector load/stores to not use ld1/st1

2015-01-13 Thread Jeff Law


On 01/10/15 06:05, Richard Sandiford wrote:

Sorry for the slow response.  Jeff has approved the patch in the
meantime, but I didn't want to go ahead and apply it while there
was still disagreement...
Thanks.  I didn't realize there was a disagreement when I approved. 
Let's continue to hash this out a bit in the hopes that we can all get 
to a place where we're comfortable with the final change, whatever it 
happens to me.


jeff

Re: [PATCH] [AArch64, NEON] Improve vpmaxX vpminX intrinsics

2015-01-13 Thread Yangfei (Felix)

 On 09/12/14 08:17, Yangfei (Felix) wrote:
  On 28 November 2014 at 09:23, Yangfei (Felix) felix.y...@huawei.com
 wrote:
  Hi,
 This patch converts vpmaxX  vpminX intrinsics to use builtin
  functions
  instead of the previous inline assembly syntax.
 Regtested with aarch64-linux-gnu on QEMU.  Also passed the
  glorious
  testsuite of Christophe Lyon.
 OK for the trunk?
 
  Hi Felix,   We know from experience that the advsimd intrinsics tend
  to be fragile for big endian and in general it is fairly easy to
  break the big endian case.  For these advsimd improvements that you
  are working on (that we very much appreciate) it is important to run
  both little endian and big endian regressions.
 
  Thanks
  /Marcus
 
 
  Okay.  Any plan for the advsimd big-endian improvement?
  I rebased this patch over Alan Lawrance's patch:
  https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00279.html
  No regressions for aarch64_be-linux-gnu target too.  OK for the thunk?
 
 
  Index: gcc/ChangeLog
 
 =
 ==
  --- gcc/ChangeLog   (revision 218464)
  +++ gcc/ChangeLog   (working copy)
  @@ -1,3 +1,18 @@
  +2014-12-09  Felix Yang  felix.y...@huawei.com
  +
  +   * config/aarch64/aarch64-simd.md
 (aarch64_maxmin_unspmode): New
  +   pattern.
  +   * config/aarch64/aarch64-simd-builtins.def (smaxp, sminp, umaxp,
  +   uminp, smax_nanp, smin_nanp): New builtins.
  +   * config/aarch64/arm_neon.h (vpmax_s8, vpmax_s16, vpmax_s32,
  +   vpmax_u8, vpmax_u16, vpmax_u32, vpmaxq_s8, vpmaxq_s16,
 vpmaxq_s32,
  +   vpmaxq_u8, vpmaxq_u16, vpmaxq_u32, vpmax_f32, vpmaxq_f32,
 vpmaxq_f64,
  +   vpmaxqd_f64, vpmaxs_f32, vpmaxnm_f32, vpmaxnmq_f32,
 vpmaxnmq_f64,
  +   vpmaxnmqd_f64, vpmaxnms_f32, vpmin_s8, vpmin_s16, vpmin_s32,
 vpmin_u8,
  +   vpmin_u16, vpmin_u32, vpminq_s8, vpminq_s16, vpminq_s32,
 vpminq_u8,
  +   vpminq_u16, vpminq_u32, vpmin_f32, vpminq_f32, vpminq_f64,
 vpminqd_f64,
  +   vpmins_f32, vpminnm_f32, vpminnmq_f32, vpminnmq_f64,
  + vpminnmqd_f64,
  +
 
 
__extension__ static __inline float32x2_t __attribute__
  ((__always_inline__))
  Index: gcc/config/aarch64/aarch64-simd.md
 
 =
 ==
  --- gcc/config/aarch64/aarch64-simd.md  (revision 218464)
  +++ gcc/config/aarch64/aarch64-simd.md  (working copy)
  @@ -1017,6 +1017,28 @@
  DONE;
})
 
  +;; Pairwise Integer Max/Min operations.
  +(define_insn aarch64_maxmin_unspmode
  + [(set (match_operand:VDQ_BHSI 0 register_operand =w)
  +   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1
 register_operand w)
  +(match_operand:VDQ_BHSI 2 register_operand
 w)]
  +   MAXMINV))]
  + TARGET_SIMD
  + maxmin_uns_opp\t%0.Vtype, %1.Vtype, %2.Vtype
  +  [(set_attr type neon_minmaxq)]
  +)
  +
 
 Hi Felix,
 
 Sorry for the delay in getting back to you on this.
 
 If you've rolled aarch64_reduc_maxmin_uns_internalv2si into the above
 pattern, do you still need it? For all its call points, just point them to
 aarch64_maxmin_unspmode?
 
 Thanks,
 Tejas.
 


Hello Tejas,

  I didn't do this yet. 
  Currently the aarch64_reduc_maxmin_uns_internalv2si is only called by 
reduc_maxmin_uns_scal_mode. 
  I find it kind of trouble to handle this due to the use of iterators in the 
caller pattern. 
  Are you going to rework this part?

Re: [PATCH] Correct target selector in -mfentry tests

2015-01-13 Thread Jeff Law


On 01/13/15 14:27, H.J. Lu wrote:

-fprofile -mfentry works with PIE if gcrt1.o is compiled with -fPIC.  A
glibc has been filed, PR 17836, and a glibc patch has been submitted.
OK for trunk?

Thanks.


H.J.
--
* gcc.target/i386/fentry-override.c: Properly place {} in target
selector.  Remove nonpic.
* gcc.target/i386/fentry.c: Likewise.
Does this change the pass/fail result of the test on a system without an 
updated glibc?


jeff

Re: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips

2015-01-13 Thread Richard Sandiford

Maciej W. Rozycki ma...@linux-mips.org writes:
 On Tue, 13 Jan 2015, Matthew Fortune wrote:

   I have tested this for both mips and micromips, and the tests now
   pass successfully.
   The ChangeLog and patch are below.
  
Hmm, instead of trying to avoid testing microMIPS code generation
   just to satisfy the test suite I'd rather see the test cases updated
   so that LWM/SWM register ranges are expected and accepted whenever
   microMIPS code is produced.  These scan patterns can be made
  conditional.
  
  FWIW I think Andrew's patch is correct.  If we want to test microMIPS
  output against micromips-specific regexps, we should add a separate test
  that forces micromips, so that it gets tested regardless of people's
  RUNTESTFLAGS.  Doing that shouldn't hold up Andrew's patch though.

  Taking care that the default compilation mode does not conflict (e.g. 
 MIPS16, incompatible) and taking any exceptions into account (e.g. n64, 
 unsupported) I presume, right?

mips.exp sorts that out for you.  Adding -mmicromips or (-micromips)
to dg-options forces (or at least is supposed to force) the overall flags
to be compatible with microMIPS.

The aim of mips.exp is avoid skipping tests whereever possible.  If
someone runs the testsuite with -mips16 and we have a -micromips test,
it's better to remove -mips16 for that test than to skip the test entirely.

 I was going to suggest a follow up patch to add copies of the three tests
 as Richard suggests. I haven't yet done a micromips run of the testsuite
 to check for any other issues like this but I suspect problems are limited
 to the tests that I recently added.

  Please always try to test changes reasonably, i.e. at least o32, 
 o32/MIPS16, o32/microMIPS, n32, n64, and then Linux and ELF if applicable, 
 plus any options that may be relevant, unless it is absolutely clear 
 ABI/ISA variations do not matter for a change proposed.

TBH this seems a bit much.  On the one hand it's more testing than you'd
get for almost any other target, but on the other it leaves out important
differences like MIPS I vs MIPS II vs MIPS 32, MIPS III vs MIPS IV vs MIPS64,
r1 vs. r2 vs. r6, Octeon vs. Loongson vs. vanilla, DSP vs. no DSP, etc.
I think we just have to accept that there are so many possible
combinations that we can't test everything that's potentially relevant.
I think it's more useful to be flexible than prescribe a particular list.

Having everyone test the same multilib combinations on the same target
isn't necessarily a good thing anyway.  Diversity in testing (between
developers) is useful too.

Thanks,
Richard

Re: [PATCH][rtlanal.c][BE][1/2] Fix vector load/stores to not use ld1/st1

2015-01-13 Thread Jeff Law


On 01/13/15 11:55, Eric Botcazou wrote:



(1) we have a non-paradoxical subreg;
(2) both (reg:ymode xregno) and (reg:xmode xregno) occupy full
 hard registers (no padding or unused upper bits);
(3) (reg:ymode xregno) and (reg:xmode xregno) store the same number
 of bytes (X) in each constituent hard register;
(4) the offset is a multiple of X, i.e. the data we're accessing
 is aligned to a register boundary; and
(5) endianness is regular (no differences between words and bytes,
 or between registers and memory)


OK, that's a nice translation of the new code. :-)

It seems to me that the patch wants to extend the support of generic subregs
to modes whose sizes are not multiple of each other, which is a requirement of
the existing code, but does that in a very specific case for the sake of the
ARM port without saying where all the above restrictions come from.
Basically we're lifting the restriction that the the sizes are multiples 
of each other.  The requirements above are the set where we know it will 
work.  They are target independent, but happen to match what the ARM needs.


The certainly do short circuit the meat of the function, that's the 
whole point, there's this set of conditions under which we know this 
will work and when they hold, we bypass.


Now one could argue that instead of bypassing we should put the code to 
handle this situation further down.  I'd be leery of doing that just 
from a complexity standpoint.  But one could also argue that short 
circuiting like the patch does adds complexity as well and may be a bit 
kludgy.


Maybe the way forward here is for someone to try and integrate this 
support in the main part of the code and see how it looks.  Then we can 
pick one.


The downside is since this probably isn't a regression that work would 
need to happen quickly to make it into gcc-5.


Which leads to another option, get the release managers to sign off on 
the kludge after gcc-5 branches and only install the kludge on the gcc-5 
branch and insisting the other solution go in for gcc-6 and beyond.  Not 
sure if they'd do that, but it's a discussion that could happen.



jeff

Re: [PATCH] add option to emit more array bounds warnigs

2015-01-13 Thread Martin Uecker


Jeff Law l...@redhat.com:
 On 01/13/15 17:40, Martin Uecker wrote:
  Jeff Law l...@redhat.com:
  On 01/13/15 10:34, Martin Uecker wrote:
  Mon, 12 Jan 2015 11:00:44 -0700
  Jeff Law l...@redhat.com:
  On 11/11/14 23:13, Martin Uecker wrote:
 
  ...
 
  Has this patch been bootstrapped and regression tested, if so on what
  platform.
 
  x86_64-unknown-linux-gnu
  Approved.  Please install on the trunk.  Sorry about the delays.
 
  I don't have write access ;-(
 I fixed up the ChangeLog entries and installed the patch for you.

Thank you, Jeff!

 If you plan to contribute regularly, you should go ahead and apply for 
 write access to the repository so that you'll be able to commit your own 
 patches once they're approved.

I put a request in with you as sponsor (hope this is ok).

 You'll also need to make sure you have an assignment on file with the 
 FSF.That patch was pretty small (the testcase was larger than the 
 patch itself, which I always like :-) so I didn't request an assignment. 
   Further submissions likely will require an assignment.

I already have an assignment on file.

Martin

Re: [PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Andrew MacLeod


On 01/13/2015 01:38 PM, Torvald Riegel wrote:

On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote:

On 01/13/2015 09:59 AM, Richard Biener wrote:

On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote:

Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

Basically we can generate incorrect code for an atomic consume operation in
some circumstances.  The general feeling seems to be that we should simply
promote all consume operations to an acquire operation until there is a
better definition/understanding of the consume model and how GCC can track
it.

I proposed a simple patch in the PR, and I have not seen or heard of any
dissenting opinion.   We should get this in before the end of stage 3 I
think.

The problem with the patch in the PR is the  memory model is immediately
promoted from consume to acquire.   This happens *before* any of the
memmodel checks are made.  If a consume is illegally specified (such as in a
compare_exchange), it gets promoted to acquire and the compiler doesn't
report the error because it never sees the consume.

This new patch simply makes the adjustment after any errors are checked on
the originally specified model.   It bootstraps on x86_64-unknown-linux-gnu
and passes all regression testing.
I also built an aarch64 compiler and it appears to issue the LDAR as
specified in the PR, but anyone with a vested interest really ought to check
it out with a real build to be sure.

OK for trunk?

Why not patch get_memmodel?  (not sure if that catches all cases)

Richard.



That was the original patch.

The issue is that it  promotes consume to acquire before any error
checking gets to look at the model, so then we allow illegal
specification of consume. (It actually triggers a failure in the testsuite)

(This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c)

The documentation of the atomic builtins also disallows mo_consume on
atomic_exchange.

However, I don't see any such requirement in C11 or C++14 (and I'd be
surprised to see it in C++11).  It would be surprising also because for
other atomic read-modify-write operations (eg, fetch_add), we don't make
such a requirement in the builtins docs -- and atomic_exchange is just a
read-modify-write with a noop, basically.

Does anyone remember why this requirement for no consume on exchange was
added, or sees a reason to keep it?  If not, I think we should drop it.
This would solve the testsuite failure for Andrew.  Dropping it would
prevent GCC from checking the consume-on-success / acquire-on-failure
case for compare_excahnge I mentioned previously, but I think that this
is pretty harmless.

I could imagine that, for some reason, either backends or libatomic do
not implement consume on atomic_exchange just because the docs
disallowed it -- but I haven't checked that.

I imagine it was probably in a previous incarnation of the standard...  
Most of this was actually implemented  based on very early draft 
standards years and years ago and never revised.   It wasnt put in by me 
unless the standard at some point said had such wording.   The current 
standard appears to make no mention of the situation.


It seems that it should be safe to move back to the original patch, and 
remove that error test for using consume on an exchange...


Andrew

Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)

2015-01-13 Thread Jakub Jelinek

On Tue, Jan 13, 2015 at 12:45:27PM -0700, Jeff Law wrote:
 On 01/13/15 09:38, Segher Boessenkool wrote:
 On Tue, Jan 13, 2015 at 05:18:19PM +0100, Jakub Jelinek wrote:
 3) on request from Richard (which Segher on IRC argues against), memory
 clobber also prevents CSE;
 
 As extend.texi used to say:
 
 
 If your assembler instructions access memory in an unpredictable
 fashion, add @samp{memory} to the list of clobbered registers.  This
 causes GCC to not keep memory values cached in registers across the
 assembler instruction and not optimize stores or loads to that memory.
 You also should add the @code{volatile} keyword if the memory
 affected is not listed in the inputs or outputs of the @code{asm}, as
 the @samp{memory} clobber does not count as a side-effect of the
 @code{asm}.
 
 
 so a memory clobber in a non-volatile asm should not prevent CSE.
 My reading of that paragraph is somewhat different.
 
 The key here is the memory clobber affects optimization of instructions
 around the asm while the volatile specifier affects the optimization of the
 ASM itself.
 
 A memory clobber must inhibit CSE of memory references on either side of the
 asm because the asm must be assumed to read or write memory in unpredictable
 ways.
 
 The volatile specifier tells the compiler that the asm itself must be
 preserved, even if dataflow shows the outputs as not used.

That is not necessarily in conflict.
My reading of Jeff's comment is that in
int a;
int
foo (void)
{
  int b, c, d, e;
  b = a;
  asm (... : =r (c) : : memory);
  d = a;
  asm (... : =r (e) : : memory);
  return b + d + 2 * (c + e);
}
we are not allowed to CSE d = a; into d = b;.  CSE invalidate_from_clobbers
should ensure that already, even when we don't do anything special about
memory clobber in the patch.  Another thing is if there is a store
in between the two non-volatile asms with memory clobber, here I'm not
sure if with the alternate patch we'd treat the memory clobber as use of
everything previously stored into memory (in this regard the posted version
is safe).
And finally there is the case of non-volatile asm with memory clobber with
no memory stores in between the two - the posted (safer) patch will not
allow to CSE the two, while in theory we could CSE them into just one asm.

Jakub

Re: PR54442 build_qualified_type produces a non-canonical type

2015-01-13 Thread Paolo Carlini


Hi,

On 06/09/2014 04:46 PM, Jason Merrill wrote:

On 06/09/2014 10:32 AM, Marc Glisse wrote:

On Mon, 9 Jun 2014, Jason Merrill wrote:


On 06/09/2014 10:18 AM, Marc Glisse wrote:

I doubt the patch can be wrong, but it may be that this is a situation
that is not supposed to happen and should be fixed elsewhere?


Seems likely.  What is the difference between the type returned from
build_qualified_type (TYPE_CANONICAL and it's TYPE_CANONICAL? I would
expect them to be the same.


 throws tree_list 0x7660e5c8
 purpose integer_cst 0x764d6ba0 constant 1

(in what build_qualified_type returns)


I guess that makes sense, given that the exception specification isn't 
really part of the type.  The patch is OK.


In fact, I noticed today that this is a 4.8/4.9 Regression too. Shall I 
try to apply the patchlet to 4_9-branch too and, if testing passes, 
commit there and close the bug?


Thanks,
Paolo.

Re: [PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Jeff Law


On 01/13/15 11:38, Torvald Riegel wrote:

On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote:

On 01/13/2015 09:59 AM, Richard Biener wrote:

On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote:

Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

Basically we can generate incorrect code for an atomic consume operation in
some circumstances.  The general feeling seems to be that we should simply
promote all consume operations to an acquire operation until there is a
better definition/understanding of the consume model and how GCC can track
it.

I proposed a simple patch in the PR, and I have not seen or heard of any
dissenting opinion.   We should get this in before the end of stage 3 I
think.

The problem with the patch in the PR is the  memory model is immediately
promoted from consume to acquire.   This happens *before* any of the
memmodel checks are made.  If a consume is illegally specified (such as in a
compare_exchange), it gets promoted to acquire and the compiler doesn't
report the error because it never sees the consume.

This new patch simply makes the adjustment after any errors are checked on
the originally specified model.   It bootstraps on x86_64-unknown-linux-gnu
and passes all regression testing.
I also built an aarch64 compiler and it appears to issue the LDAR as
specified in the PR, but anyone with a vested interest really ought to check
it out with a real build to be sure.

OK for trunk?

Why not patch get_memmodel?  (not sure if that catches all cases)

Richard.



That was the original patch.

The issue is that it  promotes consume to acquire before any error
checking gets to look at the model, so then we allow illegal
specification of consume. (It actually triggers a failure in the testsuite)


(This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c)

The documentation of the atomic builtins also disallows mo_consume on
atomic_exchange.

However, I don't see any such requirement in C11 or C++14 (and I'd be
surprised to see it in C++11).  It would be surprising also because for
other atomic read-modify-write operations (eg, fetch_add), we don't make
such a requirement in the builtins docs -- and atomic_exchange is just a
read-modify-write with a noop, basically.

Does anyone remember why this requirement for no consume on exchange was
added, or sees a reason to keep it?  If not, I think we should drop it.
This would solve the testsuite failure for Andrew.  Dropping it would
prevent GCC from checking the consume-on-success / acquire-on-failure
case for compare_excahnge I mentioned previously, but I think that this
is pretty harmless.

I could imagine that, for some reason, either backends or libatomic do
not implement consume on atomic_exchange just because the docs
disallowed it -- but I haven't checked that.
AFAICT that test has been there since the initial commit of 
sync-mem-invalid.c (which was later renamed to atomic-invalid).   In 
fact, that was the only test initially in sync-mem-invalid.c

commit 64d1dbf10e3f08305f4a8569e27fc2224f9074d2
Author: amacleod amacleod@138bc75d-0d04-0410-961f-82ee72b054a4
Date:   Thu Jun 23 13:09:31 2011 +

Basica tests for __sync_mem_exchange and framework for further 
additions.


* lib/target-support.exp (check_effective_target_sync_int_128,
check_effective_target_sync_long_long): Check whether the target
supports 64 and 128 bit __sync builtins.
* gcc.dg/sync-mem.h: New. Common code to check memory model 
__syncs.

* gcc.dg/sync-mem-1.c: New. Check char size.
* gcc.dg/sync-mem-2.c: New. Check short size.
* gcc.dg/sync-mem-3.c: New. Check int size.
* gcc.dg/sync-mem-4.c: New. Check long long.
* gcc.dg/sync-mem-5.c: New. Check 128 bit.
* gcc.dg/sync-mem-invalid.c: New. Check invalid memory modes.



git-svn-id: 
svn+ssh://gcc.gnu.org/svn/gcc/branches/cxx-mem-model@175331 
138bc75d-0d04-0410-961f-82ee72b054a4



Mostly hoping this refreshes Andrew's memory and he can provide some 
insight on why we test this particular combination and consider it invalid.


I was kind of hoping that we'd track this down to something like a 
particular target didn't support this capability with the old sync 
builtins and we carried it into the atomics when we made that switch.


I don't have a vested interest in either approach.  I just want to see 
us DTRT.



jeff

Re: [PATCH] Fix ICE with -fgnu-tm and pragma ivdep (PR middle-end/64391)

2015-01-13 Thread Jeff Law


On 01/13/15 09:28, Marek Polacek wrote:

We ICE on this testcase, because the usage of #pragma GCC ivdep
pulls in the ANNOTATE internal functions which don't have underlying
fndecls, hence we segv on a NULL_TREE.  This patch makes get_attrs_for
be prepared for such a scenario.  The callers of get_attrs_for already
check for NULL_TREE.  I don't think internal fns can have transaction_*
attributes anyway.  While at it, I did some cleanups.

Bootstrapped/regtested on {ppc64,x86_64}-linux, ok for trunk?

2015-01-13  Marek Polacek  pola...@redhat.com

PR middle-end/64391
* trans-mem.c (get_attrs_for): Return NULL_TREE if X is NULL_TREE.

* gcc.dg/tm/pr64391.c: New test.
OK.  I looked briefly at perhaps catching this earlier in the call 
chain, but your approach looks best to me.


Thanks,
Jeff

Re: [testsuite] PATCH: Add check_effective_target_pie

2015-01-13 Thread Jeff Law


On 01/13/15 05:52, H.J. Lu wrote:

On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote:

On 01/12/15 14:51, Magnus Granberg wrote:

måndag 12 januari 2015 12.11.17 skrev  H.J. Lu:

On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote:

On 01/12/15 12:59, H.J. Lu wrote:

I don't know if -pg will work PIE on any targets.  For Linux/x86
the choices of crt1.o are

%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}

-shared, -pg and -pie are mutually exclusive. Those crt1 files are
only crt1 files provided by glibc.  You can't even try -pg -pie on
Linux without changing glibc.


You're totally missing the point.  What I care about is *why*.


With -pg it use gcrt1.o object file and that file is not compile with -fPIC.
When you build a shared lib on x86_64 all the objects files need to be buiit
with -fPIC else you get a error like that one abow and it is the same problems
when you build bin with -fPIE and linke with -pie.
Glibc do not provide one that is compile with -fPIC

Is there some reason why glibc could not provide gcrt1.o compiled with
-fPIC?


That is a good question. We can compile gcrt1.o with -fPIC and it will
work with both -pg and -pg -pie.  I will open a glibc bug.
Thanks for getting the bug opened, there's a reasonable chance that 
we'll have the gcrt1.o we want in the not too distant future.





Here is the updated patch without the check_profiling_available change.
OK for trunk?

Thanks.

H.J.
---
Subject: [PATCH 1/5] Add check_effective_target_pie

Hi,

This patch adds check_effective_target_pie to check if the current
multilib generates PIE by default.

Thanks.

H.J.
---
2015-01-11  H.J. Lu  hongjiu...@intel.com

* gcc.target/i386/pie.c: New test.

* lib/target-supports.exp (check_effective_target_pie): New.

OK.
Jeff

Re: [testsuite] PATCH: Check if -pg available

2015-01-13 Thread Jeff Law


On 01/13/15 05:54, H.J. Lu wrote:

On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote:

On 01/12/15 14:51, Magnus Granberg wrote:

måndag 12 januari 2015 12.11.17 skrev  H.J. Lu:

On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote:

On 01/12/15 12:59, H.J. Lu wrote:

I don't know if -pg will work PIE on any targets.  For Linux/x86
the choices of crt1.o are

%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}

-shared, -pg and -pie are mutually exclusive. Those crt1 files are
only crt1 files provided by glibc.  You can't even try -pg -pie on
Linux without changing glibc.


You're totally missing the point.  What I care about is *why*.


With -pg it use gcrt1.o object file and that file is not compile with -fPIC.
When you build a shared lib on x86_64 all the objects files need to be buiit
with -fPIC else you get a error like that one abow and it is the same problems
when you build bin with -fPIE and linke with -pie.
Glibc do not provide one that is compile with -fPIC

Is there some reason why glibc could not provide gcrt1.o compiled with
-fPIC?




Here is a patch to check if -pg is available.  If -pg doesn't link,
profiling isn't available.  OK for trunk?

OK with a suitable ChangeLog entry.

jeff

Re: [PATCH][rtlanal.c][BE][1/2] Fix vector load/stores to not use ld1/st1

2015-01-13 Thread Eric Botcazou

 Sorry for the slow response.  Jeff has approved the patch in the
 meantime, but I didn't want to go ahead and apply it while there
 was still disagreement...

I still think that it isn't appropriate to short-circuit the main computation 
as the patch does, but I don't want to block it after Jeff's approval.

 (1) we have a non-paradoxical subreg;
 (2) both (reg:ymode xregno) and (reg:xmode xregno) occupy full
 hard registers (no padding or unused upper bits);
 (3) (reg:ymode xregno) and (reg:xmode xregno) store the same number
 of bytes (X) in each constituent hard register;
 (4) the offset is a multiple of X, i.e. the data we're accessing
 is aligned to a register boundary; and
 (5) endianness is regular (no differences between words and bytes,
 or between registers and memory)

OK, that's a nice translation of the new code. :-)

It seems to me that the patch wants to extend the support of generic subregs 
to modes whose sizes are not multiple of each other, which is a requirement of 
the existing code, but does that in a very specific case for the sake of the 
ARM port without saying where all the above restrictions come from.

-- 
Eric Botcazou

Re: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips

2015-01-13 Thread Richard Sandiford

Maciej W. Rozycki ma...@linux-mips.org writes:
 On Tue, 13 Jan 2015, Andrew Bennett wrote:

 The call-saved-{4-6}.c tests in the mips testsuite fail for micromips.
 The reason is
 that micromips uses the swm and lwm instructions to save/restore the
 call-saved registers
 rather than using the sw and lw instructions.  The swm and lwm
 instructions only list
 the range of registers to use ie. $16-$25 and hence some of the
 scan-assembler
 patterns fail.  This fix adds the NO_COMPRESSION attribute to the foo
 function to
 force the tests to always compile as mips.
  
 I have tested this for both mips and micromips, and the tests now pass
 successfully.
 The ChangeLog and patch are below.

  Hmm, instead of trying to avoid testing microMIPS code generation just to 
 satisfy the test suite I'd rather see the test cases updated so that 
 LWM/SWM register ranges are expected and accepted whenever microMIPS code 
 is produced.  These scan patterns can be made conditional.

FWIW I think Andrew's patch is correct.  If we want to test microMIPS
output against micromips-specific regexps, we should add a separate test
that forces micromips, so that it gets tested regardless of people's
RUNTESTFLAGS.  Doing that shouldn't hold up Andrew's patch though.

Whereever possible gcc.target/mips should not have conditional dg-finals.

Thanks,
Richard

RE: [PATCH] Allow MIPS call-saved-{4-6}.c tests to correctly run for micromips

2015-01-13 Thread Matthew Fortune

Richard Sandiford rdsandif...@googlemail.com  writes:
 Maciej W. Rozycki ma...@linux-mips.org writes:
  On Tue, 13 Jan 2015, Andrew Bennett wrote:
 
  The call-saved-{4-6}.c tests in the mips testsuite fail for
 micromips.
  The reason is
  that micromips uses the swm and lwm instructions to save/restore the
  call-saved registers rather than using the sw and lw instructions.
  The swm and lwm instructions only list the range of registers to use
  ie. $16-$25 and hence some of the scan-assembler patterns fail.  This
  fix adds the NO_COMPRESSION attribute to the foo function to force
  the tests to always compile as mips.
 
  I have tested this for both mips and micromips, and the tests now
  pass successfully.
  The ChangeLog and patch are below.
 
   Hmm, instead of trying to avoid testing microMIPS code generation
  just to satisfy the test suite I'd rather see the test cases updated
  so that LWM/SWM register ranges are expected and accepted whenever
  microMIPS code is produced.  These scan patterns can be made
 conditional.
 
 FWIW I think Andrew's patch is correct.  If we want to test microMIPS
 output against micromips-specific regexps, we should add a separate test
 that forces micromips, so that it gets tested regardless of people's
 RUNTESTFLAGS.  Doing that shouldn't hold up Andrew's patch though.
 
 Whereever possible gcc.target/mips should not have conditional dg-
 finals.

I was going to suggest a follow up patch to add copies of the three tests
as Richard suggests. I haven't yet done a micromips run of the testsuite
to check for any other issues like this but I suspect problems are limited
to the tests that I recently added.

I certainly agree that we shouldn't just ignore micromips expected output
given it is pretty easy to test.

Please go ahead and commit this patch so we clean up the test results for
GCC 5 in case you (or anyone else) doesn't get to submitting the extra test
cases before we hit stage 4.

Thanks,
Matthew

Re: [PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Torvald Riegel

On Tue, 2015-01-13 at 10:11 -0500, Andrew MacLeod wrote:
 On 01/13/2015 09:59 AM, Richard Biener wrote:
  On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote:
  Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448
 
  Basically we can generate incorrect code for an atomic consume operation in
  some circumstances.  The general feeling seems to be that we should simply
  promote all consume operations to an acquire operation until there is a
  better definition/understanding of the consume model and how GCC can track
  it.
 
  I proposed a simple patch in the PR, and I have not seen or heard of any
  dissenting opinion.   We should get this in before the end of stage 3 I
  think.
 
  The problem with the patch in the PR is the  memory model is immediately
  promoted from consume to acquire.   This happens *before* any of the
  memmodel checks are made.  If a consume is illegally specified (such as in 
  a
  compare_exchange), it gets promoted to acquire and the compiler doesn't
  report the error because it never sees the consume.
 
  This new patch simply makes the adjustment after any errors are checked on
  the originally specified model.   It bootstraps on x86_64-unknown-linux-gnu
  and passes all regression testing.
  I also built an aarch64 compiler and it appears to issue the LDAR as
  specified in the PR, but anyone with a vested interest really ought to 
  check
  it out with a real build to be sure.
 
  OK for trunk?
  Why not patch get_memmodel?  (not sure if that catches all cases)
 
  Richard.
 
 
 That was the original patch.
 
 The issue is that it  promotes consume to acquire before any error 
 checking gets to look at the model, so then we allow illegal 
 specification of consume. (It actually triggers a failure in the testsuite)

(This is this test: gcc/testsuite/gcc.dg/atomic-invalid.c)

The documentation of the atomic builtins also disallows mo_consume on
atomic_exchange.

However, I don't see any such requirement in C11 or C++14 (and I'd be
surprised to see it in C++11).  It would be surprising also because for
other atomic read-modify-write operations (eg, fetch_add), we don't make
such a requirement in the builtins docs -- and atomic_exchange is just a
read-modify-write with a noop, basically.

Does anyone remember why this requirement for no consume on exchange was
added, or sees a reason to keep it?  If not, I think we should drop it.
This would solve the testsuite failure for Andrew.  Dropping it would
prevent GCC from checking the consume-on-success / acquire-on-failure
case for compare_excahnge I mentioned previously, but I think that this
is pretty harmless.

I could imagine that, for some reason, either backends or libatomic do
not implement consume on atomic_exchange just because the docs
disallowed it -- but I haven't checked that.

Re: [PATCH] Fix for PR64081 in RTL loop unroller

2015-01-13 Thread Jeff Law


On 01/13/15 11:01, Zamyatin, Igor wrote:


Is it really sufficient here to verify that all the defs are on latch 
predecessors,
what about the case where there is a predecessor without a def.  How do
you guarantee domination in that case?

ISTM that given the structure for the code you're writing that you'd want to
verify that in the event of multiple definitions that all of them appear on
immediate predecessors of the latch *and* that each immediate
predecessor has a definition.


Yes, do you think it's better to check exactly immediate predecessors?
I'd use the same structure that you have in iv_get_reaching_def.  If 
there was a reasonable way to factor that test into a single function 
and call it from both places that would be even better.


Jeff

Re: [PATCH] add option to emit more array bounds warnigs

2015-01-13 Thread Jeff Law


On 01/13/15 10:34, Martin Uecker wrote:


Mon, 12 Jan 2015 11:00:44 -0700
Jeff Law l...@redhat.com:

On 11/11/14 23:13, Martin Uecker wrote:


...




* gcc/tree-vrp.c (check_array_ref): Emit more warnings
 for warn_array_bounds = 2.
* gcc/testsuite/gcc.dg/Warray-bounds-11.c: New test-case.
* gcc/c-family/c.opt: New option -Warray-bounds=.
* gcc/common.opt: New option -Warray-bounds=.
* gcc/doc/invoke.texi: Document new option.

Has this patch been bootstrapped and regression tested, if so on what
platform.


x86_64-unknown-linux-gnu

Approved.  Please install on the trunk.  Sorry about the delays.

Thanks,

Jeff

Re: [PATCH] Fix REE for vector modes (PR rtl-optimization/64286, take 2)

2015-01-13 Thread Jeff Law


On 01/13/15 09:11, Jakub Jelinek wrote:

On Mon, Jan 12, 2015 at 02:29:53PM -0700, Jeff Law wrote:

On 01/12/15 12:59, Jakub Jelinek wrote:

Hi!

As mentioned in the PR, giving up for all vector mode extensions
is unnecessary, but unlike scalar integer extensions, where the low part
of the extended value is the original value, for vectors this is not true,
thus the old value is lost.  Which means we can perform REE, but only if
all uses of the definition are the same (code+mode) extension.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-01-12  Jakub Jelinek  ja...@redhat.com

PR rtl-optimization/64286
* ree.c (add_removable_extension): Don't add vector mode
extensions if all uses of the source register aren't the same
vector extensions.

* gcc.target/i386/avx2-pr64286.c: New test.

Does it make sense to remove your change for 59754 in combine_reaching_defs?
Shouldn't this patch handle that case as well?


You're right, this patch handles that too.

New patch, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2015-01-13  Jakub Jelinek  ja...@redhat.com

PR rtl-optimization/64286
* ree.c (combine_reaching_defs): Move part of comment earlier,
remove !SCALAR_INT_MODE_P check.
(add_removable_extension): Don't add vector mode
extensions if all uses of the source register aren't the same
vector extensions.

* gcc.target/i386/avx2-pr64286.c: New test.
OK.  Thanks for taking care of this.  I can't seem to find time for 
doing any real debugging or bugfixing.


jeff

Re: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)

2015-01-13 Thread Jeff Law


On 01/13/15 09:38, Segher Boessenkool wrote:

On Tue, Jan 13, 2015 at 05:18:19PM +0100, Jakub Jelinek wrote:

3) on request from Richard (which Segher on IRC argues against), memory
clobber also prevents CSE;


As extend.texi used to say:


If your assembler instructions access memory in an unpredictable
fashion, add @samp{memory} to the list of clobbered registers.  This
causes GCC to not keep memory values cached in registers across the
assembler instruction and not optimize stores or loads to that memory.
You also should add the @code{volatile} keyword if the memory
affected is not listed in the inputs or outputs of the @code{asm}, as
the @samp{memory} clobber does not count as a side-effect of the
@code{asm}.


so a memory clobber in a non-volatile asm should not prevent CSE.

My reading of that paragraph is somewhat different.

The key here is the memory clobber affects optimization of instructions 
around the asm while the volatile specifier affects the optimization of 
the ASM itself.


A memory clobber must inhibit CSE of memory references on either side of 
the asm because the asm must be assumed to read or write memory in 
unpredictable ways.


The volatile specifier tells the compiler that the asm itself must be 
preserved, even if dataflow shows the outputs as not used.


eff

Re: [PATCH] add option to emit more array bounds warnigs

2015-01-13 Thread Martin Uecker

Jeff Law l...@redhat.com:
 On 01/13/15 10:34, Martin Uecker wrote:
  Mon, 12 Jan 2015 11:00:44 -0700
  Jeff Law l...@redhat.com:
  On 11/11/14 23:13, Martin Uecker wrote:

...

  Has this patch been bootstrapped and regression tested, if so on what
  platform.
 
  x86_64-unknown-linux-gnu
 Approved.  Please install on the trunk.  Sorry about the delays.

I don't have write access ;-(

Martin

Re: Housekeeping work in backends.html

2015-01-13 Thread Anthony Green

Eric Botcazou ebotca...@adacore.com writes:

 Some ports are missing (lm32, moxie, nios2, nvptx, rl78, rx) so the relevant 
 maintainers are CCed (see 6.3.9 Anatomy of a Target Back End in the doc).

I think I got this right

   |  Characteristics
Target | HMSLQNFICBD lqrcpfgmbdiates
---+
moxie  |   F   g  ds


AG

Re: [patch 1/2][ARM]: New CPU support for Marvell Whitney

2015-01-13 Thread Xingxing Pan


On 09/01/2015 19:22, Kyrill Tkachov wrote:

Hi Xingxing,

On 19/12/14 11:01, Xingxing Pan wrote:

+/* Return true if vector element size is byte. */

Minor nit: two spaces after full stop and before */ Same in other places
in the patch.


+bool
+marvell_whitney_vector_element_size_is_byte (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == SET)
+{
+  if ((GET_MODE (SET_DEST (PATTERN (insn))) == V8QImode) ||
+  (GET_MODE (SET_DEST (PATTERN (insn))) == V16QImode))
+   return true;
+}
+
+  return false;
+}


I see this is called from inside marvell-whitney.md. It seems to me that
this function takes RTX insns. Can the type of this be strengthened to
rtx_insn * ?
Also, this should be refactored and written a bit more generally by
checking for VECTOR_MODE_P and then GET_MODE_INNER for QImode, saving
you the trouble of enumerating the different vector QI modes.



+
+/* Return true if INSN has shift operation but is not a shift insn. */
+bool
+marvell_whitney_non_shift_with_shift_operand (rtx insn)


Similar comment. Can this be strengthened to rtx_insn * ?

Thanks,
Kyrill

+{
+  rtx pat = PATTERN (insn);
+
+  if (GET_CODE (pat) != SET)
+return false;
+
+  /* Is not a shift insn. */
+  rtx rvalue = SET_SRC (pat);
+  RTX_CODE code = GET_CODE (rvalue);
+  if (code == ASHIFT || code == ASHIFTRT
+  || code == LSHIFTRT || code == ROTATERT)
+return false;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, rvalue, ALL)
+{
+  /* Has shift operation. */
+  RTX_CODE code = GET_CODE (*iter);
+  if (code == ASHIFT || code == ASHIFTRT
+  || code == LSHIFTRT || code == ROTATERT)
+return true;
+}
+
+  return false;
+}




Hi Kyrill,

Thanks for advice. Refactored patch is attached.

--
Regards,
Xingxing
commit 3627056607b1e8604ac8d85ed44fdc7d3209cd3e
Author: Xingxing Pan xxing...@marvell.com
Date:   Thu Dec 18 16:58:05 2014 +0800

2015-01-13 Xingxing Pan  xxing...@marvell.com

* config/arm/arm-cores.def: Add new core marvell-whitney.
* config/arm/arm-protos.h:
(marvell_whitney_vector_mode_qi): Declare.
(marvell_whitney_inner_shift): Ditto.
* config/arm/arm-tables.opt: Regenerated.
* config/arm/arm-tune.md: Regenerated.
* config/arm/arm.c (arm_marvell_whitney_tune): New structure.
(arm_issue_rate): Add marvell_whitney.
(marvell_whitney_vector_mode_qi): New function.
(marvell_whitney_inner_shift): Ditto.
* config/arm/arm.md: Include marvell-whitney.md.
(generic_sched): Add marvell_whitney.
(generic_vfp): Ditto.
* config/arm/bpabi.h (BE8_LINK_SPEC): Add marvell-whitney.
* config/arm/t-arm (MD_INCLUDES): Add marvell-whitney.md.
* config/arm/marvell-whitney.md: New file.
* doc/invoke.texi: Document marvell-whitney.

diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 6fa5d99..26eb7ab 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -159,6 +159,7 @@ ARM_CORE(cortex-m7,		cortexm7, cortexm7,		7EM, FL_LDSCHED, cortex_m7)
 ARM_CORE(cortex-m4,		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
 ARM_CORE(cortex-m3,		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
 ARM_CORE(marvell-pj4,		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, 9e)
+ARM_CORE(marvell-whitney,	marvell_whitney, marvell_whitney, 7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, marvell_whitney)
 
 /* V7 big.LITTLE implementations */
 ARM_CORE(cortex-a15.cortex-a7, cortexa15cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index fc45348..45001ae 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -231,6 +231,9 @@ extern void arm_order_regs_for_local_alloc (void);
 
 extern int arm_max_conditional_execute ();
 
+extern bool marvell_whitney_vector_mode_qi (rtx_insn *insn);
+extern bool marvell_whitney_inner_shift (rtx_insn *insn);
+
 /* Vectorizer cost model implementation.  */
 struct cpu_vec_costs {
   const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index ece9d5e..dc5f364 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -298,6 +298,9 @@ EnumValue
 Enum(processor_type) String(marvell-pj4) Value(marvell_pj4)
 
 EnumValue
+Enum(processor_type) String(marvell-whitney) Value(marvell_whitney)
+
+EnumValue
 Enum(processor_type) String(cortex-a15.cortex-a7) Value(cortexa15cortexa7)
 
 EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 452820ab..c73c33c 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -31,6 +31,7 @@
 	cortexa15,cortexa17,cortexr4,
 	cortexr4f,cortexr5,cortexr7,
 	cortexm7,cortexm4,cortexm3,
-	marvell_pj4,cortexa15cortexa7,cortexa17cortexa7,
-	cortexa53,cortexa57,cortexa57cortexa53
+

Re: [PATCH 4/4] Wire X-Gene 1 up in the ARM (32bit) backend as a AArch32-capable core.

2015-01-13 Thread Ramana Radhakrishnan




On 12/01/15 20:15, Philipp Tomsich wrote:

---
  gcc/ChangeLog-2014| 10 ++
  gcc/config/arm/arm-cores.def  |  1 +
  gcc/config/arm/arm-tables.opt |  3 +++
  gcc/config/arm/arm-tune.md|  3 ++-
  gcc/config/arm/arm.c  | 22 ++
  gcc/config/arm/arm.md | 11 +--
  gcc/config/arm/bpabi.h|  2 ++
  gcc/config/arm/t-arm  |  1 +
  gcc/doc/invoke.texi   |  3 ++-
  9 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog-2014 b/gcc/ChangeLog-2014
index dd49d7f..c3c62db 100644
--- a/gcc/ChangeLog-2014
+++ b/gcc/ChangeLog-2014
@@ -3497,6 +3497,16 @@
63965.
* config/rs6000/rs6000.c: Likewise.

+2014-12-23  Philipp Tomsich  philipp.toms...@theobroma-systems.com
+
+   * config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list.
+   Include xgene1.md.
+   * config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1.
+   * config/arm/arm-cores.def (xgene1): New entry.
+   * config/arm/arm-tables.opt: Regenerate.
+   * config/arm/arm-tune.md: Regenerate.
+   * config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1.
+
  2014-11-22  Jan Hubicka  hubi...@ucw.cz

PR ipa/63671
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index be125ac..fa13eb9 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -167,6 +167,7 @@ ARM_CORE(cortex-a17.cortex-a7, cortexa17cortexa7, 
cortexa7, 7A,  FL_LDSCHED |
  /* V8 Architecture Processors */
  ARM_CORE(cortex-a53,  cortexa53, cortexa53,   8A, FL_LDSCHED | 
FL_CRC32, cortex_a53)
  ARM_CORE(cortex-a57,  cortexa57, cortexa15,   8A, FL_LDSCHED | 
FL_CRC32, cortex_a57)
+ARM_CORE(xgene1,  xgene1,xgene1,  8A, FL_LDSCHED,
xgene1)

  /* V8 big.LITTLE implementations */
  ARM_CORE(cortex-a57.cortex-a53, cortexa57cortexa53, cortexa53, 8A,  
FL_LDSCHED | FL_CRC32, cortex_a57)
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index ece9d5e..1392429 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -310,6 +310,9 @@ EnumValue
  Enum(processor_type) String(cortex-a57) Value(cortexa57)

  EnumValue
+Enum(processor_type) String(xgene1) Value(xgene1)
+
+EnumValue
  Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53)

  Enum
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 452820ab..dcd5054 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -32,5 +32,6 @@
cortexr4f,cortexr5,cortexr7,
cortexm7,cortexm4,cortexm3,
marvell_pj4,cortexa15cortexa7,cortexa17cortexa7,
-   cortexa53,cortexa57,cortexa57cortexa53
+   cortexa53,cortexa57,xgene1,
+   cortexa57cortexa53
(const (symbol_ref ((enum attr_tune) arm_tune
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8ca2dd8..14c8a87 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1903,6 +1903,25 @@ const struct tune_params arm_cortex_a57_tune =
ARM_FUSE_MOVW_MOVT  /* Fuseable pairs of 
instructions.  */
  };

+const struct tune_params arm_xgene1_tune =
+{
+  arm_9e_rtx_costs,
+  xgene1_extra_costs,
+  NULL,/* Scheduler cost adjustment.  
*/
+  1,   /* Constant limit.  */
+  2,   /* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,   /* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,/* Prefer LDRD/STRD.  */
+  {true, true},/* Prefer non short circuit.  */
+  arm_default_vec_cost,   /* Vectorizer costs.  */
+  false,   /* Prefer Neon for 64-bits 
bitops.  */
+  true, true,  /* Prefer 32-bit encodings.  */
+  false,  /* Prefer Neon for stringops.  */
+  32  /* Maximum insns to inline 
memset.  */
+};
+
  /* Branches can be dual-issued on Cortex-A5, so conditional execution is
 less appealing.  Set max_insns_skipped to a low value.  */

@@ -27066,6 +27085,9 @@ arm_issue_rate (void)
  {
switch (arm_tune)
  {
+case xgene1:
+  return 4;
+
  case cortexa15:
  case cortexa57:
return 3;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index c61057f..a3cbf3b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -109,6 +109,11 @@
  ;; given instruction does not shift one of its input operands.
  (define_attr shift  (const_int 0))

+;; [For compatibility with AArch64 in pipeline models]
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+(define_attr fp no,yes (const_string no))
+
  ; Floating Point

Re: [RFC PATCH Fortran] make enum_9/10.f90 testcases work under FreeBSD ARM

2015-01-13 Thread Ramana Radhakrishnan

On Sun, Jan 11, 2015 at 9:55 PM, Andreas Tobler andreast-l...@fgznet.ch wrote:
 Hi,

 I have here a possible way to make the enum_9.f90 and the enum_10.f90 work
 under arm*-*-freebsd*. The solution for enum_9.f90 is straight forward. But
 the one for enum_10.f90 requires a reordering of the dg-additional-sources
 line. This I do not understand yet, but maybe one of you does.

 If I have the original ordering and change the dg-options to check on
 'target arm_eabi' I get strange compilation errors:

Any patches that convert arm*-*-linux* to arm_eabi can be considered
as obvious as long as you test them on an EABI compliant platform
which freebsd appears to be getting towards.

I haven't looked too deeply about the other ordering issue you mention here.


regards
Ramana


 ---
 /build/gcc/obj_gcc_armv6/gcc/testsuite/gfortran/../../gfortran
 -B/build/gcc/obj_gcc_armv6/gcc/testsuite/gfortran/../../
 -B/build/gcc/obj_gcc_armv6/armv6-unknown-freebsd11.0/./libgfortran/
 -fno-diagnostics-show-caret -fdiagnostics-color=never ./enum_10.c -c -o
 arm_eabi89728.o arm_eabi89728.c^M
 gfortran: fatal error: cannot specify -o with -c, -S or -E with multiple
 files^M
 compilation terminated.^M
 ---

 The -c comes after the enum_10.c 

 Attached the solution which makes the tests pass. I tested them under
 FreeBSD armv6-*-freebsd11.0 and x86_64-unknown-freebsd11.0. Also under
 x86_64-unknown-linux-gnu. All PASS.

 Would this be ok for trunk?

 TIA,
 Andreas

 2015-01-11  Andreas Tobler  andre...@gcc.gnu.org

 * gfortran.dg/enum_9.f90: Replace arm*-*-linux* with arm_eabi.
 * gfortran.dg/enum_10.f90: Likewise. Reorder dg-additional-sources.

Re: [PATCH 3/4] Change the type of the prefetch-instructions to 'prefetch'.

2015-01-13 Thread Ramana Radhakrishnan




On 12/01/15 20:15, Philipp Tomsich wrote:

---
  gcc/config/aarch64/aarch64.md | 2 +-
  gcc/config/arm/types.md   | 2 ++
  2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1f6b1b6..98f4f30 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -391,7 +391,7 @@

  return pftype[INTVAL(operands[1])][locality];
}
-  [(set_attr type load1)]
+  [(set_attr type prefetch)]
  )

  (define_insn trap
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index d368446..088c21a 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -118,6 +118,7 @@
  ; mvn_shift_reg  inverting move instruction, shifted operand by a 
register.
  ; no_insnan insn which does not represent an instruction in the
  ;final output, thus having no impact on scheduling.
+; prefetch   a prefetch instruction
  ; rbit   reverse bits.
  ; revreverse bytes.
  ; sdiv   signed division.
@@ -556,6 +557,7 @@
call,\
clz,\
no_insn,\
+  prefetch,\
csel,\
crc,\
extend,\




Can you follow up with a patch that adjusts the prefetch insn 
attribute in the ARM backend and uses this consistently in all the 
pipeline descriptions (i.e. treat this equivalent to load1 and make sure 
the compiler builds for AArch32 afterwards) ?


It would be complete to do the same for all the pipeline descriptions in 
the AArch64 backend too.


Ramana

Re: [PATCH, aarch64] Add prefetch support

2015-01-13 Thread Andrew Pinski

On Tue, Jan 13, 2015 at 6:13 AM, Marcus Shawcroft
marcus.shawcr...@gmail.com wrote:
 On 11 January 2015 at 02:37, Andrew Pinski pins...@gmail.com wrote:
 On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft
 marcus.shawcr...@gmail.com wrote:
 On 30 October 2014 08:54, Gopalasubramanian, Ganesh
 ganesh.gopalasubraman...@amd.com wrote:

 2014-10-30  Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com

 Check the whitespace in your ChangeLog line.

 * config/arm/types.md (define_attr type): Add prefetch.

 The existing schedulers use 'load1'.  We can of course split that into
 two introducing prefetch and update all of the existing schedulers
 to reflect the change.  However I suggest we do that as a separate
 activity when someone actually needs the distinction, note this change
 will require updating the schedulers for both ARM and AArch64 backends
 not just those relevant to AArch64.  For this prefetch patch I suggest
 we go with the existing load1.

 I will need this change for ThunderX schedule.  The Pref instruction
 is single issued while load1 can be dual issued.

 Hi

 https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html

 Philipp when you deal with Ramana's request above to split
 load1-load1/prefetch in the existing schedulers I suggest you also
 split it in aarch64/thunderx.md in order to retain existing behaviour.
 Andrew can then follow up add the right behaviour when he is ready.
 Andrew OK ?

Yes that sounds ok to me.  I was going to submit an update to
thunderx.md file this week anyways.

Thanks,
Andrew



 Cheers
 /Marcus

Re: [PATCH, aarch64] Add prefetch support

2015-01-13 Thread Dr. Philipp Tomsich

Great. I should have an update patch-set ready  tested later tonight.

Best,
Phil.

 On 13 Jan 2015, at 15:18, Andrew Pinski pins...@gmail.com wrote:
 
 On Tue, Jan 13, 2015 at 6:13 AM, Marcus Shawcroft
 marcus.shawcr...@gmail.com wrote:
 On 11 January 2015 at 02:37, Andrew Pinski pins...@gmail.com wrote:
 On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft
 marcus.shawcr...@gmail.com wrote:
 On 30 October 2014 08:54, Gopalasubramanian, Ganesh
 ganesh.gopalasubraman...@amd.com wrote:
 
 2014-10-30  Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com
 
 Check the whitespace in your ChangeLog line.
 
* config/arm/types.md (define_attr type): Add prefetch.
 
 The existing schedulers use 'load1'.  We can of course split that into
 two introducing prefetch and update all of the existing schedulers
 to reflect the change.  However I suggest we do that as a separate
 activity when someone actually needs the distinction, note this change
 will require updating the schedulers for both ARM and AArch64 backends
 not just those relevant to AArch64.  For this prefetch patch I suggest
 we go with the existing load1.
 
 I will need this change for ThunderX schedule.  The Pref instruction
 is single issued while load1 can be dual issued.
 
 Hi
 
 https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html
 
 Philipp when you deal with Ramana's request above to split
 load1-load1/prefetch in the existing schedulers I suggest you also
 split it in aarch64/thunderx.md in order to retain existing behaviour.
 Andrew can then follow up add the right behaviour when he is ready.
 Andrew OK ?
 
 Yes that sounds ok to me.  I was going to submit an update to
 thunderx.md file this week anyways.
 
 Thanks,
 Andrew
 
 
 
 Cheers
 /Marcus

Re: [PATCH 2/4] Pipeline model for APM XGene-1.

2015-01-13 Thread Richard Earnshaw

On 13/01/15 13:46, Marcus Shawcroft wrote:
 On 12 January 2015 at 20:15, Philipp Tomsich
 philipp.toms...@theobroma-systems.com wrote:
 ---
  gcc/config/aarch64/aarch64.md |   1 +
  gcc/config/arm/xgene1.md  | 531 
 ++
  2 files changed, 532 insertions(+)
  create mode 100644 gcc/config/arm/xgene1.md

 diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
 index 12e1054..1f6b1b6 100644
 --- a/gcc/config/aarch64/aarch64.md
 +++ b/gcc/config/aarch64/aarch64.md
 @@ -190,6 +190,7 @@
  (include ../arm/cortex-a53.md)
  (include ../arm/cortex-a15.md)
  (include thunderx.md)
 +(include ../arm/xgene1.md
 
 Can we have a ChangeLog entry please.
 /Marcus
 

And please update the copyright years in xgene1.md.

R.

Re: [PATCH, aarch64] Add prefetch support

2015-01-13 Thread Marcus Shawcroft

On 11 January 2015 at 02:37, Andrew Pinski pins...@gmail.com wrote:
 On Tue, Nov 11, 2014 at 6:47 AM, Marcus Shawcroft
 marcus.shawcr...@gmail.com wrote:
 On 30 October 2014 08:54, Gopalasubramanian, Ganesh
 ganesh.gopalasubraman...@amd.com wrote:

 2014-10-30  Ganesh Gopalasubramanian ganesh.gopalasubraman...@amd.com

 Check the whitespace in your ChangeLog line.

 * config/arm/types.md (define_attr type): Add prefetch.

 The existing schedulers use 'load1'.  We can of course split that into
 two introducing prefetch and update all of the existing schedulers
 to reflect the change.  However I suggest we do that as a separate
 activity when someone actually needs the distinction, note this change
 will require updating the schedulers for both ARM and AArch64 backends
 not just those relevant to AArch64.  For this prefetch patch I suggest
 we go with the existing load1.

 I will need this change for ThunderX schedule.  The Pref instruction
 is single issued while load1 can be dual issued.

Hi

https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00802.html

Philipp when you deal with Ramana's request above to split
load1-load1/prefetch in the existing schedulers I suggest you also
split it in aarch64/thunderx.md in order to retain existing behaviour.
Andrew can then follow up add the right behaviour when he is ready.
Andrew OK ?

Cheers
/Marcus

Re: [testsuite] PATCH: Add check_effective_target_pie

2015-01-13 Thread H.J. Lu

On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote:
 On 01/12/15 14:51, Magnus Granberg wrote:
 måndag 12 januari 2015 12.11.17 skrev  H.J. Lu:
 On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote:
 On 01/12/15 12:59, H.J. Lu wrote:
 I don't know if -pg will work PIE on any targets.  For Linux/x86
 the choices of crt1.o are
 
 %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}
 
 -shared, -pg and -pie are mutually exclusive. Those crt1 files are
 only crt1 files provided by glibc.  You can't even try -pg -pie on
 Linux without changing glibc.
 
 You're totally missing the point.  What I care about is *why*.
 
 With -pg it use gcrt1.o object file and that file is not compile with -fPIC.
 When you build a shared lib on x86_64 all the objects files need to be buiit
 with -fPIC else you get a error like that one abow and it is the same 
 problems
 when you build bin with -fPIE and linke with -pie.
 Glibc do not provide one that is compile with -fPIC
 Is there some reason why glibc could not provide gcrt1.o compiled with
 -fPIC?
 

I opened a glibc bug:

https://sourceware.org/bugzilla/show_bug.cgi?id=17836

and submitted a patch:

https://sourceware.org/ml/libc-alpha/2015-01/msg00284.html

H.J.

[PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Andrew MacLeod


Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

Basically we can generate incorrect code for an atomic consume operation 
in some circumstances.  The general feeling seems to be that we should 
simply promote all consume operations to an acquire operation until 
there is a better definition/understanding of the consume model and how 
GCC can track it.


I proposed a simple patch in the PR, and I have not seen or heard of any 
dissenting opinion.   We should get this in before the end of stage 3 I 
think.


The problem with the patch in the PR is the  memory model is immediately 
promoted from consume to acquire.   This happens *before* any of the 
memmodel checks are made.  If a consume is illegally specified (such as 
in a compare_exchange), it gets promoted to acquire and the compiler 
doesn't report the error because it never sees the consume.


This new patch simply makes the adjustment after any errors are checked 
on the originally specified model.   It bootstraps on 
x86_64-unknown-linux-gnu and passes all regression testing.
I also built an aarch64 compiler and it appears to issue the LDAR as 
specified in the PR, but anyone with a vested interest really ought to 
check it out with a real build to be sure.


OK for trunk?

Andrew
	* builtins.c (memmodel_consume_fix) : New.  Promote consume to acquire.
	(expand_builtin_atomic_exchange, expand_builtin_atomic_compare_exchange,
	expand_builtin_atomic_load, expand_builtin_atomic_fetch_op,
	expand_builtin_atomic_clear, expand_builtin_atomic_test_and_set,
	expand_builtin_atomic_thread_fence, expand_builtin_atomic_signal_fence):
	Call memmodel_consume_fix. 

Index: builtins.c
===
*** builtins.c	(revision 219462)
--- builtins.c	(working copy)
*** get_memmodel (tree exp)
*** 5368,5373 
--- 5368,5382 
return (enum memmodel) val;
  }
  
+ /* Workaround for Bugzilla 59448. GCC doesn't track consume properly, so
+be conservative and promote consume to acquire.  */
+ static void
+ memmodel_consume_fix (enum memmodel val)
+ {
+   if (val == MEMMODEL_CONSUME)
+ val = MEMMODEL_ACQUIRE;
+ }
+ 
  /* Expand the __atomic_exchange intrinsic:
 	TYPE __atomic_exchange (TYPE *object, TYPE desired, enum memmodel)
 EXP is the CALL_EXPR.
*** expand_builtin_atomic_exchange (machine_
*** 5389,5394 
--- 5398,5405 
if (!flag_inline_atomics)
  return NULL_RTX;
  
+   memmodel_consume_fix (model);
+ 
/* Expand the operands.  */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
*** expand_builtin_atomic_compare_exchange (
*** 5434,5439 
--- 5445,5453 
if (!flag_inline_atomics)
  return NULL_RTX;
  
+   memmodel_consume_fix (success);
+   memmodel_consume_fix (failure);
+ 
/* Expand the operands.  */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
  
*** expand_builtin_atomic_load (machine_mode
*** 5493,5498 
--- 5507,5514 
if (!flag_inline_atomics)
  return NULL_RTX;
  
+   memmodel_consume_fix (model);
+ 
/* Expand the operand.  */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
  
*** expand_builtin_atomic_fetch_op (machine_
*** 5553,5558 
--- 5569,5576 
  
model = get_memmodel (CALL_EXPR_ARG (exp, 2));
  
+   memmodel_consume_fix (model);
+ 
/* Expand the operands.  */
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
*** expand_builtin_atomic_clear (tree exp)
*** 5627,5632 
--- 5645,5652 
return const0_rtx;
  }
  
+   memmodel_consume_fix (model);
+ 
if (HAVE_atomic_clear)
  {
emit_insn (gen_atomic_clear (mem, model));
*** expand_builtin_atomic_test_and_set (tree
*** 5658,5664 
mode = mode_for_size (BOOL_TYPE_SIZE, MODE_INT, 0);
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
model = get_memmodel (CALL_EXPR_ARG (exp, 1));
! 
return expand_atomic_test_and_set (target, mem, model);
  }
  
--- 5678,5684 
mode = mode_for_size (BOOL_TYPE_SIZE, MODE_INT, 0);
mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
model = get_memmodel (CALL_EXPR_ARG (exp, 1));
!   memmodel_consume_fix (model);
return expand_atomic_test_and_set (target, mem, model);
  }
  
*** static void
*** 5797,5802 
--- 5817,5823 
  expand_builtin_atomic_thread_fence (tree exp)
  {
enum memmodel model = get_memmodel (CALL_EXPR_ARG (exp, 0));
+   memmodel_consume_fix (model);
expand_mem_thread_fence (model);
  }
  
*** static void
*** 5808,5813 
--- 5829,5835 
  expand_builtin_atomic_signal_fence (tree exp)
  {
enum memmodel model = get_memmodel (CALL_EXPR_ARG (exp, 0));
+   memmodel_consume_fix (model);

[patch] libstdc++/64571 export fstream functions using new std::string

2015-01-13 Thread Jonathan Wakely


The existing tests for these functions are compile-only so didn't
catch that I forgot to export these new symbols. I'll add a better
test next week.

Tested x86_64-linux, committed to trunk.
commit d428e75af04d995451a917ef7c9caed6b8cee737
Author: Jonathan Wakely jwak...@redhat.com
Date:   Tue Jan 13 14:27:34 2015 +

	PR libstdc++/64571
	* config/abi/pre/gnu.ver: Export fstream functions using new string.

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver
index 7bb65e9..700da18 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -1648,6 +1648,13 @@ GLIBCXX_3.4.21 {
 _ZStlsI[cw]St11char_traitsI[cw]ESaI[cw]EERSt13basic_ostreamIT_T0_ES7_RKNSt7__cxx1112basic_string*;
 _ZStrsI[cw]St11char_traitsI[cw]ESaI[cw]EERSt13basic_istreamIT_T0_ES7_RNSt7__cxx1112basic_string*;
 
+# fstream functions taking ABI-tagged std::string
+_ZNSt13basic_filebufI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*;
+_ZNSt13basic_fstreamI[cw]St11char_traitsI[cw]EEC1ERKNSt7__cxx1112basic_string*;
+_ZNSt13basic_fstreamI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*;
+_ZNSt14basic_[io]fstreamI[cw]St11char_traitsI[cw]EEC1ERKNSt7__cxx1112basic_string*;
+_ZNSt14basic_[io]fstreamI[cw]St11char_traitsI[cw]EE4openERKNSt7__cxx1112basic_string*;
+
 # std::locale::name() returning new std::string
 _ZNKSt6locale4nameB5cxx11Ev;

Re: [PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Richard Biener

On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote:
 Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

 Basically we can generate incorrect code for an atomic consume operation in
 some circumstances.  The general feeling seems to be that we should simply
 promote all consume operations to an acquire operation until there is a
 better definition/understanding of the consume model and how GCC can track
 it.

 I proposed a simple patch in the PR, and I have not seen or heard of any
 dissenting opinion.   We should get this in before the end of stage 3 I
 think.

 The problem with the patch in the PR is the  memory model is immediately
 promoted from consume to acquire.   This happens *before* any of the
 memmodel checks are made.  If a consume is illegally specified (such as in a
 compare_exchange), it gets promoted to acquire and the compiler doesn't
 report the error because it never sees the consume.

 This new patch simply makes the adjustment after any errors are checked on
 the originally specified model.   It bootstraps on x86_64-unknown-linux-gnu
 and passes all regression testing.
 I also built an aarch64 compiler and it appears to issue the LDAR as
 specified in the PR, but anyone with a vested interest really ought to check
 it out with a real build to be sure.

 OK for trunk?

Why not patch get_memmodel?  (not sure if that catches all cases)

Richard.


 Andrew

Re: [PATCH][ARM] FreeBSD ARM support, EABI, v3

2015-01-13 Thread Ramana Radhakrishnan

On Thu, Jan 8, 2015 at 8:51 PM, Andreas Tobler andreast-l...@fgznet.ch wrote:
 On 08.01.15 17:27, Richard Earnshaw wrote:

 On 29/12/14 18:44, Andreas Tobler wrote:

 All,

 here is the third attempt to support ARM with FreeBSD.

 In the meantime we found another issue in the unwinder where I had to
 adapt some stuff.

 The unwind_phase2_forced function in libgcc calls a stop_fn function.
 This stop_fn is in FreeBSD's libthr implementation and is called
 thread_unwind_stop. This thread_unwind_stop is a generic function used
 on all FreeBSD archs.

 The issue is now that this thread_unwind_stop expects a double int for
 the exception_class, like on every other arch. For ARM EABI this
 exception_class is an array of char which is passed in one register as
 pointer vs. two registers for a double int.

 To solve this issue we defined the exception_class as double integer for
 FreeBSD.

My apologies for the slow response, some other work and then holidays
intervened.

From my understanding of the ABI document the implementation is
currently as mandated by the ABI. Also this isn't a part of the ABI
that's available for the platform (here FreeBSD to manipulate and
change as per it's wishes). ARM EHABI is special for software, making
FreeBSD more special for ARM appears to be counter intuitive from my
point of view. A number of exception unwinding libraries. for e.g.
libobjc , libstdc++ all use this implementation of exception_class.
Therefore this creates a divergence for the FreeBSD port which is
different from everything else. I expect that a number of language run
time support libraries that supported the ARM EHABI would be using
such an implementation, therefore you need to fix every single
implementation of this in every unwinder that supports the ARM EHABI
which I expect to have been ported to in a number of libraries
already. (I already see this in libobjc and libstdc++ in the GCC tree)

I would rather fix the thread_unwind_stop implementation in libthr for
ARM EHABI rather than make this change.



 This adaptation reduced the failure count in libstdc++ by about 40 fails.

 I build and test this port on a regular basis and I post the results to
 the usual place.

Thanks for doing this. I'm really glad that FreeBSD is finally moving to EABI.


regards
Ramana



 ...

 Umm, sorry, just seen this update to the previous patch.

 The changes to the exception unwinding look a bit more involved.  Could
 you separate that out into a separate patch, so that it's easier to see
 what you're changing?


 Ok, here the mentioned part as separate diff. The comments are above. The CL
 below :)

 Thank you very much!

 Andreas

 gcc:

 * ginclude/unwind-arm-common.h (_Uwind_Control_Block): Define
 exception_class as double integer for FreeBSD ARM.
 (_Unwind_Exception): Define _Unwind_Exception_Class as double
 integer
 for FreeBSD ARM.

 libstc++-v3:


 * libsupc++/unwind-cxx.h (__is_gxx_exception_class,
 __is_dependent_exception): Exclude FreeBSD ARM from the
 __ARM_EABI_UNWINDER__ ifdef.

Re: [PATCH] Fix PR64461, Incorrect code on coldfire targets

2015-01-13 Thread Andreas Schwab

Jeff Law l...@redhat.com writes:

 For fun I've got an m68k bootstrap of the trunk running.  I don't expect
 it to finish for at least a week or so, assuming it runs to completion.

The last time I did that it took about 10 days (with all languages
enabled, running in Aranym on a moderately fast host).

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
And now for something completely different.

Re: flatten expr.h (version 2)

2015-01-13 Thread Prathamesh Kulkarni

On 13 January 2015 at 15:34, Richard Biener rguent...@suse.de wrote:
 On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote:

 Hi,
 This is a revamped expr.h flattening flattening patch rebased on
 tree.h and tree-core.h flattening patch (r219402).
 It depends upon the following patch to get committed.
 https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html

 Changes:
 * Removed all includes except tree-core.h. Put includes required by
 expr.h in a comment.
 * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively.
 * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, 
 genoutput.c.
 * Did not put includes in gcc-plugin.h since expr.h cannot be included
 by plugins
 (putting them broke building a file in c-family/ since expr.h is not
 allowed in front-ends)
 * Affects java front-end (expr.h is allowed in java front-end).

 Bootstrapped and tested on x86_64-unknown-linux-gnu with languages:
 all,go,ada,jit
 Built on all targets in config-list.mk with languages: all, go.
 OK to commit ?

 diff --git a/gcc/expr.c b/gcc/expr.c
 index fc22862..824541e 100644
 --- a/gcc/expr.c
 +++ b/gcc/expr.c
 @@ -41,11 +41,17 @@ along with GCC; see the file COPYING3.  If not see
  #include regs.h
  #include hard-reg-set.h
  #include except.h
 -#include input.h
  #include function.h
  #include insn-config.h
  #include insn-attr.h
  /* Include expr.h after insn-config.h so we get HAVE_conditional_move.
 */
 +#include hashtab.h
 +#include emit-rtl.h
 +#include expmed.h
 +#include stmt.h
 +#include statistics.h
 +#include real.h
 +#include fixed-value.h
  #include expr.h

 Please move the comment to the proper place
ah, my flattening tool doesn't look at comments. I will move the
comment before expr.h include, thanks.

 diff --git a/gcc/expr.h b/gcc/expr.h
 index a7638b8..f1be8dc 100644
 --- a/gcc/expr.h
 +++ b/gcc/expr.h
 @@ -20,7 +20,8 @@ along with GCC; see the file COPYING3.  If not see
  #ifndef GCC_EXPR_H
  #define GCC_EXPR_H

 -/* For inhibit_defer_pop */
 +/* expr.h required includes */
 +#if 0
  #include hashtab.h
  #include hash-set.h
  #include vec.h
 @@ -29,15 +30,17 @@ along with GCC; see the file COPYING3.  If not see
  #include hard-reg-set.h
  #include input.h
  #include function.h
 -/* For XEXP, GEN_INT, rtx_code */
  #include rtl.h
 -/* For optimize_size */
  #include flags.h
 -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop,
 -   ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/
  #include tree-core.h
 -/* For GET_MODE_BITSIZE, word_mode */
  #include insn-config.h
 +#include alias.h
 +#include emit-rtl.h
 +#include expmed.h
 +#include stmt.h
 +#endif

 Err, please remove the #if 0 section
I kept it because if something breaks later (hopefully not!), it will
be easier to fix.
I will remove it.

 +
 +#include tree-core.h

 Why?  The original comment says

 -/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop,
 -   ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/

 but all those are declared in tree.h.  Which means the files including
 expr.h must already include tree.h.

 If that's not the reason we need to include tree-core.h from expr.c
 please add a comment explaining why.
bt-load.c fails to compile because it includes expr.h but does not
include tree.h
I will place tree.h include in all files that include expr.h and rebuild.

 -/* Definitions from emit-rtl.c */
 -#include emit-rtl.h
 -
  /* Return a memory reference like MEMREF, but with its mode widened to
 MODE and adjusted by OFFSET.  */
  extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT);

 err - functions defined in emit-rtl.c should be declared in emit-rtl.h.
 Please fix that first.  expr.h should _only_ contain prototypes
 for stuff defined in expr.c.
oops, missed it :(

 Andrew did a good job with this, first cleaning up a header moving
 declarations to proper places and only after that flattening it.

 The rest of the patch looks good to me but expr.h isn't in a good
 shape after it.
I will work on it and send patch with suggested changes by tomorrow.

Thanks,
Prathamesh

 Thanks,
 Richard.

Re: [Fortran, Patch] PR60334 - Segmentation fault on character pointer assignments

2015-01-13 Thread Andre Vehreschild

Hi Paul,

thanks for the reviewed and the valued comments. 

Just for completeness I have attached the patch with the changes requested.

Bootstraps and regtests ok on x86_64-linux-gnu.

Regards,
Andre


On Mon, 12 Jan 2015 22:07:29 +0100
Paul Richard Thomas paul.richard.tho...@gmail.com wrote:

 Hi Andre,
 
 +  if (INDIRECT_REF_P (parmse.string_length))
 +/* In chains of functions/procedure calls the string_length already
 +   is a pointer to the variable holding the length.  Therefore
 +   remove the deref on call.  */
 +parmse.string_length = TREE_OPERAND (parmse.string_length, 0);
 
 This is OK but I would use instead:
 +  if (POINTER_TYPE_P (parmse.string_length))
 +/* In chains of functions/procedure calls the string_length already
 +   is a pointer to the variable holding the length.  Therefore
 +   remove the deref on call.  */
 +parmse.string_length = build_fold_indirect_ref
 (parmse.string_length);
 
 If you look in ~/gcc/fold-const.c:15751, you will see that
 TREE_OPERAND (parmse.string_length, 0) but that it is preceded by
 cleaning up of NOOPS and, in any case, its usage will preserve the
 standard API just in case the internals change :-)
 
 of course, using TREE_OPERAND (xxx, 0) in the various fortran class
 functions makes such an assumption ;-)
 
 Apart from that, the patch is fine.
 
 I'll have a session of doing some commits later this week and will do
 this patch at that time.
 
 Cheers
 
 Paul
 
 On 11 January 2015 at 16:21, Andre Vehreschild ve...@gmx.de wrote:
  Hi Paul,
 
  thanks for the review. I do not have commits rights.
 
  Unfortunately is the patch not ok. I figured today, that it needs an
  extension when function calls that return deferred char len arrays are
  nested. In this special case the string length would have been lost. The
  attached extended version fixes this issue.
 
  Sorry for the duplicate work.
 
  Bootstraps and regtests ok on x86_64-linux-gnu.
 
  Regards,
  Andre
 
  On Sun, 11 Jan 2015 16:11:10 +0100
  Paul Richard Thomas paul.richard.tho...@gmail.com wrote:
 
  Dear Andre,
 
  This is OK for trunk. I have not been keeping track of whether or not
  you have commit rights yet. If not, I will get to it sometime this
  week.
 
  Thanks for the patch.
 
  Paul
 
  On 10 January 2015 at 15:59, Andre Vehreschild ve...@gmx.de wrote:
   Hi all,
  
   attached patch fixes the bug reported in pr 60334. The issue here was
   that the function's result being (a pointer to) a deferred length char
   array. The string length for the result value was wrapped in a local
   variable, whose value was never written back to the string length of the
   result. This lead the calling routine to take the length of the result
   to be random leading to a crash.
  
   This patch addresses the issue by preventing the instantiation of the
   local var and instead using a reference to the parameter. This not only
   saves one value on the stack, but also because for small functions the
   compiler will hold all parameters in registers for a significant level
   of optimization, all the overhead of memory access (I hope :-).
  
   Bootstraps and regtests ok on x86_64-linux-gnu.
  
   - Andre
   --
   Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen
   Tel.: +49 241 9291018 * Email: ve...@gmx.de
 
 
 
 
 
  --
  Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen
  Tel.: +49 241 9291018 * Email: ve...@gmx.de
 
 
 


-- 
Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen
Tel.: +49 241 9291018 * Email: ve...@gmx.de 


pr60334_3.clog
Description: Binary data
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 1e74125..86873f7 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -1333,12 +1333,30 @@ gfc_get_symbol_decl (gfc_symbol * sym)
 	 (sym-ts.u.cl-passed_length == sym-ts.u.cl-backend_decl))
 	sym-ts.u.cl-backend_decl = NULL_TREE;
 
-	  if (sym-ts.deferred  fun_or_res
-		 sym-ts.u.cl-passed_length == NULL
-		 sym-ts.u.cl-backend_decl)
+	  if (sym-ts.deferred  byref)
 	{
-	  sym-ts.u.cl-passed_length = sym-ts.u.cl-backend_decl;
-	  sym-ts.u.cl-backend_decl = NULL_TREE;
+	  /* The string length of a deferred char array is stored in the
+		 parameter at sym-ts.u.cl-backend_decl as a reference and
+		 marked as a result.  Exempt this variable from generating a
+		 temporary for it.  */
+	  if (sym-attr.result)
+		{
+		  /* We need to insert a indirect ref for param decls.  */
+		  if (sym-ts.u.cl-backend_decl
+		   TREE_CODE (sym-ts.u.cl-backend_decl) == PARM_DECL)
+		sym-ts.u.cl-backend_decl =
+			build_fold_indirect_ref (sym-ts.u.cl-backend_decl);
+		}
+	  /* For all other parameters make sure, that they are copied so
+		 that the value and any modifications are local to the routine
+		 by generating a temporary variable.  */
+	  else if (sym-attr.function
+		sym-ts.u.cl-passed_length == NULL
+

Re: shift/extract SHIFT_COUNT_TRUNCATED combine bug

2015-01-13 Thread Richard Biener

On Mon, Jan 12, 2015 at 11:12 PM, Jeff Law l...@redhat.com wrote:
 On 04/08/14 14:07, Mike Stump wrote:

 Something broke in the compiler to cause combine to incorrectly optimize:

 (insn 12 11 13 3 (set (reg:SI 604 [ D.6102 ])
  (lshiftrt:SI (subreg/s/u:SI (reg/v:DI 601 [ x ]) 0)
  (reg:SI 602 [ D.6103 ]))) t.c:47 4436 {lshrsi3}
   (expr_list:REG_DEAD (reg:SI 602 [ D.6103 ])
  (nil)))
 (insn 13 12 14 3 (set (reg:SI 605)
  (and:SI (reg:SI 604 [ D.6102 ])
  (const_int 1 [0x1]))) t.c:47 3658 {andsi3}
   (expr_list:REG_DEAD (reg:SI 604 [ D.6102 ])
  (nil)))
 (insn 14 13 15 3 (set (reg:DI 599 [ D.6102 ])
  (zero_extend:DI (reg:SI 605))) t.c:47 4616 {zero_extendsidi2}
   (expr_list:REG_DEAD (reg:SI 605)
  (nil)))

 into:

 (insn 11 10 12 3 (set (reg:SI 602 [ D.6103 ])
  (not:SI (subreg:SI (reg:DI 595 [ D.6102 ]) 0))) t.c:47 3732
 {one_cmplsi2}
   (expr_list:REG_DEAD (reg:DI 595 [ D.6102 ])
  (nil)))
 (note 12 11 13 3 NOTE_INSN_DELETED)
 (note 13 12 14 3 NOTE_INSN_DELETED)
 (insn 14 13 15 3 (set (reg:DI 599 [ D.6102 ])
  (zero_extract:DI (reg/v:DI 601 [ x ])
  (const_int 1 [0x1])
  (reg:SI 602 [ D.6103 ]))) t.c:47 4668 {c2_extzvdi}
   (expr_list:REG_DEAD (reg:SI 602 [ D.6103 ])
  (nil)))

 This shows up in:

FAIL: gcc.c-torture/execute/builtin-bitops-1.c execution,  -Og -g

 for me.

 diff --git a/gcc/combine.c b/gcc/combine.c
 index 708691f..c1f50ff 100644
 --- a/gcc/combine.c
 +++ b/gcc/combine.c
 @@ -7245,6 +7245,18 @@ make_extraction (enum machine_mode mode, rtx inner,
 HOST_WIDE_INT pos,
 extraction_mode = insn.field_mode;
   }

 +  /* On a SHIFT_COUNT_TRUNCATED machine, we can't promote the mode of
 + the extract to a larger size on a variable extract, as previously
 + the position might have been optimized to change a bit of the
 + index of the starting bit that would have been ignored before,
 + but, with a larger mode, will then not be.  If we wanted to do
 + this, we'd have to mask out those bits or prove that those bits
 + are 0.  */
 +  if (SHIFT_COUNT_TRUNCATED
 +   pos_rtx
 +   GET_MODE_BITSIZE (extraction_mode)  GET_MODE_BITSIZE (mode))
 +extraction_mode = mode;
 +
 /* Never narrow an object, since that might not be safe.  */

 if (mode != VOIDmode

 is sufficient to never widen variable extracts on SHIFT_COUNT_TRUNCATED
 machines.  So, the question is, how did people expect this to work?  I
 didn’t spot what changed recently to cause the bad code-gen.  The
 optimization of sub into not is ok, despite how funny it looks, because is
 feeds into extract which we know by SHIFT_COUNT_TRUNCATED is safe.

 Is the patch a reasonable way to fix this?

 On a SHIFT_COUNT_TRUNCATED target, I don't think it's ever OK to widen a
 shift, variable or constant.

 In the case of a variable shift, we could easily have eliminated the masking
 code before or during combine.  For a constant shift amount we could have
 adjusted the constant (see SHIFT_COUNT_TRUNCATED in cse.c)

 I think it's just an oversight and it has simply never bit us before.

IMHO SHIFT_COUNT_TRUNCATED should be removed and instead
backends should provide shift patterns with a (and:QI ...) for the
shift amount which simply will omit that operation if suitable.

Richard.

 jeff

Re: [patch, arm] Minor optimization on thumb2 tail call

2015-01-13 Thread Ramana Radhakrishnan




On 19/11/14 02:43, Joey Ye wrote:

Current thumb2 -Os generates suboptimal code for following tail call case:

int f4(int b, int a, int c, int d);
int g(int a, int b, int c, int d)
{ return f4(b, a, c, d); }

arm-none-eabi-gcc -Os -mthumb -mcpu=cortex-m3 test.c

push
{r4, lr}
mov r4, r1
mov r1, r0
mov r0, r4
pop {r4, lr}

b f4

There are two issues: The first one is that saving/restoring lr is not
necessary, as there is no return via pop pc. The second one is that even if
we managed to avoid lr push/pop, ldmia.w sp!, {r4} is still emitted as there
is a missing pattern for pop single and code size is not optimal.

This patch fixes these two issues and introduces a shared test case. CSiBE
thumb2 -Os shows cross board code size reduction, except for one case with 4
bytes regression. The case is like:

void f ()
{
if ()
  ...
else if ()
  ...
else g();
}

There are N=2 non-sibcall returns and S=1 sibcall return. Originally the
non-sibcall returns are just pop {r4, r5, pc}, now they become
   b.n  .Lreturn

.Lreturn:
   pop {r4, r5}
   bx lr

The one byte save from sibcall return does not win the non-sibcall return
regressions back. In general scenario, number of N non-sibcall returns use
b.n branching to merged tail, number of S sibcalls save 2 bytes by avoid
poping lr. It results in 4-2*S bytes regression. In the worst scenario, each
non-sibcall return has to use b.w branching to merged tail, resulting in
(N-S)*2 bytes regression. The worst scenario is rare, according to CSiBE.
The general regression scenario can only regress 2 bytes at most. So I would
not introduce additional complexity to handle the regression case.

Make check cortex-m3: pass
thumb2 bootstrap (O2/Os): pass

 * config/arm/arm.c (arm_compute_save_reg_mask):
 Do not save lr in case of tail call.
 * config/arm/thumb2.md (*thumb2_pop_single): New pattern.

 * gcc.target/arm/thumb2-pop-single.c: New test.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 4f04707..20d0b9e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -19190,6 +19190,7 @@ arm_compute_save_reg_mask (void)
|| (save_reg_mask
   optimize_size
   ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+  !crtl-tail_call_emit
   !crtl-calls_eh_return))
  save_reg_mask |= 1  LR_REGNUM;

diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 64acfea..29cfb17 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -267,6 +267,17 @@
 (set_attr type multiple)]
  )

+;; Pop a single register as its size is preferred over a post-incremental
load
+(define_insn *thumb2_pop_single
+  [(set (match_operand:SI 0 low_register_operand =r)
+(mem:SI (post_inc:SI (reg:SI SP_REGNUM]
+  TARGET_THUMB2  (reload_in_progress || reload_completed)
+  pop\t{%0}
+  [(set_attr type load1)
+   (set_attr length 2)
+   (set_attr predicable yes)]
+)
+
  ;; We have two alternatives here for memory loads (and similarly for
stores)
  ;; to reflect the fact that the permissible constant pool ranges differ
  ;; between ldr instructions taking low regs and ldr instructions taking
high



This is OK thanks. Please CC me on ARM specific patches, this one 
somehow seems to have missed my filters.



Ramana

Re: flatten expr.h (version 2)

2015-01-13 Thread Richard Biener

On Sun, 11 Jan 2015, Prathamesh Kulkarni wrote:

 Hi,
 This is a revamped expr.h flattening flattening patch rebased on
 tree.h and tree-core.h flattening patch (r219402).
 It depends upon the following patch to get committed.
 https://gcc.gnu.org/ml/gcc-patches/2015-01/msg00565.html
 
 Changes:
 * Removed all includes except tree-core.h. Put includes required by
 expr.h in a comment.
 * Moved stmt.c, expmed.c prototypes to stmt.h, expmed.h respectively.
 * Adjusted generator programs: genemit.c, gengtype.c, genopinit.c, 
 genoutput.c.
 * Did not put includes in gcc-plugin.h since expr.h cannot be included
 by plugins
 (putting them broke building a file in c-family/ since expr.h is not
 allowed in front-ends)
 * Affects java front-end (expr.h is allowed in java front-end).
 
 Bootstrapped and tested on x86_64-unknown-linux-gnu with languages:
 all,go,ada,jit
 Built on all targets in config-list.mk with languages: all, go.
 OK to commit ?

diff --git a/gcc/expr.c b/gcc/expr.c
index fc22862..824541e 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -41,11 +41,17 @@ along with GCC; see the file COPYING3.  If not see
 #include regs.h
 #include hard-reg-set.h
 #include except.h
-#include input.h
 #include function.h
 #include insn-config.h
 #include insn-attr.h
 /* Include expr.h after insn-config.h so we get HAVE_conditional_move.  
*/
+#include hashtab.h
+#include emit-rtl.h
+#include expmed.h
+#include stmt.h
+#include statistics.h
+#include real.h
+#include fixed-value.h
 #include expr.h

Please move the comment to the proper place

diff --git a/gcc/expr.h b/gcc/expr.h
index a7638b8..f1be8dc 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -20,7 +20,8 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_EXPR_H
 #define GCC_EXPR_H

-/* For inhibit_defer_pop */
+/* expr.h required includes */
+#if 0
 #include hashtab.h
 #include hash-set.h
 #include vec.h
@@ -29,15 +30,17 @@ along with GCC; see the file COPYING3.  If not see
 #include hard-reg-set.h
 #include input.h
 #include function.h
-/* For XEXP, GEN_INT, rtx_code */
 #include rtl.h
-/* For optimize_size */
 #include flags.h
-/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop,
-   ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/
 #include tree-core.h
-/* For GET_MODE_BITSIZE, word_mode */
 #include insn-config.h
+#include alias.h
+#include emit-rtl.h
+#include expmed.h
+#include stmt.h
+#endif

Err, please remove the #if 0 section

+
+#include tree-core.h

Why?  The original comment says

-/* For tree_fits_[su]hwi_p, tree_to_[su]hwi, fold_convert, size_binop,
-   ssize_int, TREE_CODE, TYPE_SIZE, int_size_in_bytes,*/

but all those are declared in tree.h.  Which means the files including
expr.h must already include tree.h.

If that's not the reason we need to include tree-core.h from expr.c
please add a comment explaining why.

-/* Definitions from emit-rtl.c */
-#include emit-rtl.h
-
 /* Return a memory reference like MEMREF, but with its mode widened to
MODE and adjusted by OFFSET.  */
 extern rtx widen_memory_access (rtx, machine_mode, HOST_WIDE_INT);

err - functions defined in emit-rtl.c should be declared in emit-rtl.h.
Please fix that first.  expr.h should _only_ contain prototypes
for stuff defined in expr.c.

Andrew did a good job with this, first cleaning up a header moving
declarations to proper places and only after that flattening it.

The rest of the patch looks good to me but expr.h isn't in a good
shape after it.

Thanks,
Richard.

Re: [Patch, AArch64, Testsuite] Check for expected MOVI vectorization.

2015-01-13 Thread Marcus Shawcroft

On 9 January 2015 at 16:31, Tejas Belagod tejas.bela...@arm.com wrote:

 gcc/testsuite:

 * gcc.target/aarch64/vect-movi.c: Check for vectorization for
 64-bit and 128-bit.

OK /Marcus

Re: [gomp4] Replace enum omp_clause_map_kind with enum gomp_map_kind (was: Including a file from include/ in gcc/*.h)

2015-01-13 Thread Thomas Schwinge

Hi!

On Mon, 12 Jan 2015 17:39:16 +0100, Jakub Jelinek ja...@redhat.com wrote:
 On Mon, Jan 12, 2015 at 05:32:14PM +0100, Thomas Schwinge wrote:
  I have now committed the patch to gomp-4_0-branch in the following form.
  The issues raised above remain to be resolved.

(I'll try to address those later on.)


  In spirit against the tree.h header flattening, I had to keep the
  #include include/gomp-constants.h in gcc/tree-core.h, because otherwise
  I'd have to add it to a ton of *.c files, just for the enum gomp_map_kind
  definition.
  
  I found that in the C++ dialect used by GCC, it is not possible to
  declare an enum without giving the list of enumerators.  N2764 (from
  2008) resolved this by adding appropriate syntax for declaring enums,
  however: warning: scoped enums only available with -std=c++11 or
  -std=gnu++11.  If it were possible to use this, we could add to
  gcc/tree-core.h:
  
  enum gomp_map_kind : char;
  
  ... (or similar), and this way decouple the declaration (gcc/tree-core.h)
  From the actual population of it (include/gomp-constants.h).
  Alternatively, in gcc/tree-core.h:struct tree_omp_clause, we could switch
  the map_kind member from enum gomp_map_kind to a char -- but that would
  defeat the usage of an enum (easy pretty-printing of its enumerators in
  GDB, and so on.).
 
 Or just don't do this and duplicate the constants and just assert somewhere
 (in omp-low.c) at compile time that all the values match.
 Either using char and casting the value only in the OMP_* macros
 or duplicating the values sound preferrable over including
 include/gomp-constants.h from tree-core.h.

Indeed I've found precedent in gcc/tree.h: there already are a few
*_SET_* functions, also used for casting to/from enum types.  Committed
to gomp-4_0-branch in r219524:

commit 7dbb7ec6c08d604926fca30e105d2b6411cf73cb
Author: tschwinge tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4
Date:   Tue Jan 13 10:50:01 2015 +

Avoid inclusion of gomp-constants.h in gcc/tree-core.h.

N2764 (from 2008) added syntax for declaring enums, however: warning: 
scoped
enums only available with -std=c++11 or -std=gnu++11: in the C++ dialect
currently used by GCC, it is not possible to declare an enum without giving 
the
full list of enumerators.  If it were possible to use this, we could add to
gcc/tree-core.h:

enum gomp_map_kind : unsigned char;

..., and keep using enum gomp_map_kind for gcc/tree-core.h's struct
tree_omp_clause's map_kind member, and this way decouple the declaration
(gcc/tree-core.h) from the actual population of it
(include/gomp-constants.h).  Until switching GCC to C++11, we'll have to do 
as
follows:

gcc/
* tree-core.h: Don't include gomp-constants.h.
(struct tree_omp_clause): Change type of map_kind member from enum
gomp_map_kind to unsigned char.
* tree.h (OMP_CLAUSE_MAP_KIND): Cast it to enum gomp_map_kind.
(OMP_CLAUSE_SET_MAP_KIND): New macro.
* gimplify.c (gimplify_adjust_omp_clauses_1)
(gimplify_adjust_omp_clauses): Use OMP_CLAUSE_SET_MAP_KIND.
* omp-low.c (oacc_initialize_reduction_data): Likewise.
* tree-nested.c (convert_nonlocal_reference_stmt)
(convert_local_reference_stmt, convert_gimple_call): Likewise.
* tree-streamer-in.c (unpack_ts_omp_clause_value_fields):
Likewise.
gcc/c/
* c-parser.c (c_parser_oacc_data_clause)
(c_parser_oacc_data_clause_deviceptr, c_parser_omp_clause_map):
Use OMP_CLAUSE_SET_MAP_KIND.
* c-typeck.c (handle_omp_array_sections): Likewise.
gcc/cp/
* parser.c (cp_parser_oacc_data_clause)
(cp_parser_oacc_data_clause_deviceptr, cp_parser_omp_clause_map):
Use OMP_CLAUSE_SET_MAP_KIND.
* semantics.c (handle_omp_array_sections): Likewise.
gcc/fortran/
* trans-openmp.c (gfc_omp_finish_clause, gfc_trans_omp_clauses):
Use OMP_CLAUSE_SET_MAP_KIND.

gcc/
* lto-streamer-out.c: Include gomp-constants.h.
* tree-streamer-in.c: Likewise.
* tree-streamer-out.c: Likewise.
gcc/lto/
* lto.c: Include gomp-constants.h.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@219524 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp | 19 +++
 gcc/c/ChangeLog.gomp   |  7 +++
 gcc/c/c-parser.c   |  6 +++---
 gcc/c/c-typeck.c   |  2 +-
 gcc/cp/ChangeLog.gomp  |  7 +++
 gcc/cp/parser.c|  6 +++---
 gcc/cp/semantics.c |  4 ++--
 gcc/fortran/trans-openmp.c | 46 +++---
 gcc/gimplify.c | 11 ++-
 gcc/lto-streamer-out.c |  1 +
 gcc/lto/ChangeLog.gomp |  4 
 gcc/lto/lto.c  |  1 +
 gcc/omp-low.c  |  2 +-
 gcc/tree-core.h|  5 ++---
 gcc/tree-nested.c  |

Re: [PATCH] Fix PR64436: broken logic to process bitwise ORs in bswap pass

2015-01-13 Thread Richard Biener

On Mon, 12 Jan 2015, Thomas Preud'homme wrote:

 Hi all,
 
 To identify if a set of loads, shift, cast, mask (bitwise and) and bitwise OR 
 is equivalent to a load or byteswap, the bswap pass assign a number to each 
 byte loaded according to its significance (1 for lsb, 2 for next least 
 significant byte, etc.) and form a symbolic number such as 0x04030201 for a 
 32bit load. When processing a bitwise OR of two such symbolic numbers, it is 
 necessary to consider the lowest and highest addresses where a byte was 
 loaded to renumber each byte accordingly. For instance if the two numbers are 
 0x04030201 and they were loaded from consecutive word in memory the result 
 would be 0x0807060504030201 but if they overlap fully the result would be 
 0x04030201.
 
 Currently the computation of the byte with highest address is broken: it 
 takes the byte with highest address of the symbolic number that starts 
 last. That is, if one number represents a 8bit load at address 0x14 and 
 another number represent a 32bit load at address 0x12 it will compute 
 the end as 0x14 instead of 0x15. This error affects the computation of 
 the size of the load for all targets and the computation of the symbolic 
 number that result from the bitwise OR for big endian targets. This is 
 what causes PR64436 due to a change in the gimple generated for that 
 testcase.
 
 ChangeLog entry is as follows:

Ok.

Thanks,
Richard.

 gcc/ChangeLog
 
 2014-12-30 Thomas Preud'homme thomas.preudho...@arm.com
 
 PR tree-optimization/64436
 * tree-ssa-math-opts.c (find_bswap_or_nop_1): Move code performing the
 merge of two symbolic numbers for a bitwise OR to ...
 (perform_symbolic_merge): This. Also fix computation of the range and
 end of the symbolic number corresponding to the result of a bitwise OR.
 
 diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
 index 1ed2838..286183a 100644
 --- a/gcc/tree-ssa-math-opts.c
 +++ b/gcc/tree-ssa-math-opts.c
 @@ -1816,6 +1816,123 @@ find_bswap_or_nop_load (gimple stmt, tree ref, struct 
 symbolic_number *n)
return true;
  }
  
 +/* Compute the symbolic number N representing the result of a bitwise OR on 2
 +   symbolic number N1 and N2 whose source statements are respectively
 +   SOURCE_STMT1 and SOURCE_STMT2.  */
 +
 +static gimple
 +perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
 + gimple source_stmt2, struct symbolic_number *n2,
 + struct symbolic_number *n)
 +{
 +  int i, size;
 +  uint64_t mask;
 +  gimple source_stmt;
 +  struct symbolic_number *n_start;
 +
 +  /* Sources are different, cancel bswap if they are not memory location with
 + the same base (array, structure, ...).  */
 +  if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2))
 +{
 +  int64_t inc;
 +  HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
 +  struct symbolic_number *toinc_n_ptr, *n_end;
 +
 +  if (!n1-base_addr || !n2-base_addr
 +   || !operand_equal_p (n1-base_addr, n2-base_addr, 0))
 + return NULL;
 +
 +  if (!n1-offset != !n2-offset ||
 +  (n1-offset  !operand_equal_p (n1-offset, n2-offset, 0)))
 + return NULL;
 +
 +  if (n1-bytepos  n2-bytepos)
 + {
 +   n_start = n1;
 +   start_sub = n2-bytepos - n1-bytepos;
 +   source_stmt = source_stmt1;
 + }
 +  else
 + {
 +   n_start = n2;
 +   start_sub = n1-bytepos - n2-bytepos;
 +   source_stmt = source_stmt2;
 + }
 +
 +  /* Find the highest address at which a load is performed and
 +  compute related info.  */
 +  end1 = n1-bytepos + (n1-range - 1);
 +  end2 = n2-bytepos + (n2-range - 1);
 +  if (end1  end2)
 + {
 +   end = end2;
 +   end_sub = end2 - end1;
 + }
 +  else
 + {
 +   end = end1;
 +   end_sub = end1 - end2;
 + }
 +  n_end = (end2  end1) ? n2 : n1;
 +
 +  /* Find symbolic number whose lsb is the most significant.  */
 +  if (BYTES_BIG_ENDIAN)
 + toinc_n_ptr = (n_end == n1) ? n2 : n1;
 +  else
 + toinc_n_ptr = (n_start == n1) ? n2 : n1;
 +
 +  n-range = end - n_start-bytepos + 1;
 +
 +  /* Check that the range of memory covered can be represented by
 +  a symbolic number.  */
 +  if (n-range  64 / BITS_PER_MARKER)
 + return NULL;
 +
 +  /* Reinterpret byte marks in symbolic number holding the value of
 +  bigger weight according to target endianness.  */
 +  inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
 +  size = TYPE_PRECISION (n1-type) / BITS_PER_UNIT;
 +  for (i = 0; i  size; i++, inc = BITS_PER_MARKER)
 + {
 +   unsigned marker =
 + (toinc_n_ptr-n  (i * BITS_PER_MARKER))  MARKER_MASK;
 +   if (marker  marker != MARKER_BYTE_UNKNOWN)
 + toinc_n_ptr-n += inc;
 + }
 +}
 +  else
 +{
 +  n-range = n1-range;
 +  n_start = n1;
 +  source_stmt = source_stmt1;
 +}
 +
 +  if

[AArch64] Allow stack pointer as first input to a subtraction

2015-01-13 Thread Richard Sandiford

Several sub-based patterns allowed the stack pointer to be the destination
but not the first source.  This looked like an oversight; in all the patterns
changed here (but not for example in *sub_mul_imm_mode), the instruction
allows the stack pointer to appear in both positions.

Tested on aarch64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
* config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3)
(*sub_optabALLX:mode_GPI:mode, *sub_optabSHORT:mode_si_uxtw)
(*sub_optabALLX:mode_shft_GPI:mode)
(*sub_optabSHORT:mode_shft_si_uxtw, *sub_optabmode_multp2)
(*sub_optabsi_multp2_uxtw, *sub_uxtmode_multp2)
(*sub_uxtsi_multp2_uxtw): Add stack pointer sources.

gcc/testsuite/
* gcc.target/aarch64/subsp.c: New test.

Index: gcc/config/aarch64/aarch64.md
===
--- gcc/config/aarch64/aarch64.md   2015-01-13 09:48:26.901649982 +
+++ gcc/config/aarch64/aarch64.md   2015-01-13 09:48:26.897650031 +
@@ -1889,8 +1889,8 @@ (define_insn *add_uxtsi_multp2_uxtw
 
 (define_insn subsi3
   [(set (match_operand:SI 0 register_operand =rk)
-   (minus:SI (match_operand:SI 1 register_operand r)
-  (match_operand:SI 2 register_operand r)))]
+   (minus:SI (match_operand:SI 1 register_operand rk)
+ (match_operand:SI 2 register_operand r)))]
   
   sub\\t%w0, %w1, %w2
   [(set_attr type alu_sreg)]
@@ -1900,7 +1900,7 @@ (define_insn subsi3
 (define_insn *subsi3_uxtw
   [(set (match_operand:DI 0 register_operand =rk)
(zero_extend:DI
- (minus:SI (match_operand:SI 1 register_operand r)
+ (minus:SI (match_operand:SI 1 register_operand rk)
   (match_operand:SI 2 register_operand r]
   
   sub\\t%w0, %w1, %w2
@@ -1909,8 +1909,8 @@ (define_insn *subsi3_uxtw
 
 (define_insn subdi3
   [(set (match_operand:DI 0 register_operand =rk,w)
-   (minus:DI (match_operand:DI 1 register_operand r,w)
-  (match_operand:DI 2 register_operand r,w)))]
+   (minus:DI (match_operand:DI 1 register_operand rk,w)
+ (match_operand:DI 2 register_operand r,w)))]
   
   @
sub\\t%x0, %x1, %x2
@@ -2013,7 +2013,7 @@ (define_insn *sub_mul_imm_si_uxtw
 
 (define_insn *sub_optabALLX:mode_GPI:mode
   [(set (match_operand:GPI 0 register_operand =rk)
-   (minus:GPI (match_operand:GPI 1 register_operand r)
+   (minus:GPI (match_operand:GPI 1 register_operand rk)
   (ANY_EXTEND:GPI
(match_operand:ALLX 2 register_operand r]
   
@@ -2025,7 +2025,7 @@ (define_insn *sub_optabALLX:mode_G
 (define_insn *sub_optabSHORT:mode_si_uxtw
   [(set (match_operand:DI 0 register_operand =rk)
(zero_extend:DI
- (minus:SI (match_operand:SI 1 register_operand r)
+ (minus:SI (match_operand:SI 1 register_operand rk)
   (ANY_EXTEND:SI
(match_operand:SHORT 2 register_operand r)]
   
@@ -2035,7 +2035,7 @@ (define_insn *sub_optabSHORT:mode_s
 
 (define_insn *sub_optabALLX:mode_shft_GPI:mode
   [(set (match_operand:GPI 0 register_operand =rk)
-   (minus:GPI (match_operand:GPI 1 register_operand r)
+   (minus:GPI (match_operand:GPI 1 register_operand rk)
   (ashift:GPI (ANY_EXTEND:GPI
(match_operand:ALLX 2 register_operand r))
   (match_operand 3 aarch64_imm3 Ui3]
@@ -2048,7 +2048,7 @@ (define_insn *sub_optabALLX:mode_sh
 (define_insn *sub_optabSHORT:mode_shft_si_uxtw
   [(set (match_operand:DI 0 register_operand =rk)
(zero_extend:DI
- (minus:SI (match_operand:SI 1 register_operand r)
+ (minus:SI (match_operand:SI 1 register_operand rk)
   (ashift:SI (ANY_EXTEND:SI
   (match_operand:SHORT 2 register_operand r))
  (match_operand 3 aarch64_imm3 Ui3)]
@@ -2059,7 +2059,7 @@ (define_insn *sub_optabSHORT:mode_s
 
 (define_insn *sub_optabmode_multp2
   [(set (match_operand:GPI 0 register_operand =rk)
-   (minus:GPI (match_operand:GPI 4 register_operand r)
+   (minus:GPI (match_operand:GPI 4 register_operand rk)
   (ANY_EXTRACT:GPI
(mult:GPI (match_operand:GPI 1 register_operand r)
  (match_operand 2 aarch64_pwr_imm3 Up3))
@@ -2074,7 +2074,7 @@ (define_insn *sub_optabmode_multp2
 (define_insn *sub_optabsi_multp2_uxtw
   [(set (match_operand:DI 0 register_operand =rk)
(zero_extend:DI
- (minus:SI (match_operand:SI 4 register_operand r)
+ (minus:SI (match_operand:SI 4 register_operand rk)
   (ANY_EXTRACT:SI
(mult:SI (match_operand:SI 1 register_operand r)
 (match_operand 2 aarch64_pwr_imm3 Up3))
@@ -2113,7 +2113,7 @@ (define_insn *subsi3_carryin_uxtw
 
 (define_insn *sub_uxtmode_multp2
   [(set

Re: [AARCH64][PR63424][4.9]Backport Fix PR63424 by adding sumaxminv2di3 pattern

2015-01-13 Thread Marcus Shawcroft

On 7 January 2015 at 14:01, Renlin Li renlin...@arm.com wrote:

 Is it Okay for branch 4.9?

 gcc/ChangeLog:

 2014-11-19 Renlin Li renlin...@arm.com
 PR target/63424
 * config/aarch64/aarch64-simd.md (sumaxminv2di3): New.

 gcc/testsuite/ChangeLog:

 2014-11-19 Renlin Li renlin...@arm.com
 PR target/63424
 * gcc.target/aarch64/pr63424.c: New Test.


OK /Marcus

[testsuite] PATCH: Check if -pg available

2015-01-13 Thread H.J. Lu

On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote:
 On 01/12/15 14:51, Magnus Granberg wrote:
 måndag 12 januari 2015 12.11.17 skrev  H.J. Lu:
 On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote:
 On 01/12/15 12:59, H.J. Lu wrote:
 I don't know if -pg will work PIE on any targets.  For Linux/x86
 the choices of crt1.o are
 
 %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}
 
 -shared, -pg and -pie are mutually exclusive. Those crt1 files are
 only crt1 files provided by glibc.  You can't even try -pg -pie on
 Linux without changing glibc.
 
 You're totally missing the point.  What I care about is *why*.
 
 With -pg it use gcrt1.o object file and that file is not compile with -fPIC.
 When you build a shared lib on x86_64 all the objects files need to be buiit
 with -fPIC else you get a error like that one abow and it is the same 
 problems
 when you build bin with -fPIE and linke with -pie.
 Glibc do not provide one that is compile with -fPIC
 Is there some reason why glibc could not provide gcrt1.o compiled with
 -fPIC?
 
 

Here is a patch to check if -pg is available.  If -pg doesn't link,
profiling isn't available.  OK for trunk?

Thanks.


H.J.
---
 gcc/testsuite/lib/target-supports.exp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 0ac9646..7c09399 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -546,6 +546,12 @@ proc check_profiling_available { test_what } {
set profiling_available_saved 0
} else {
set profiling_available_saved 1
+   if { [check_no_compiler_messages_nocache profiling executable {
+ int main() { return 0; } } -pg] } {
+   set profiling_available_saved 1
+} else {
+   set profiling_available_saved 0
+   }
}
 }
 
-- 
1.9.3

Re: [Fortran, Patch] Cosmetics

2015-01-13 Thread Andre Vehreschild

Hi,

is this patch commited now? I don't have the rights to do so myself.

- Andre

On Sun, 28 Dec 2014 17:17:50 +0100
FX fxcoud...@gmail.com wrote:

 
  2014-12-28  Andre Vehreschild  ve...@gmx.de
  
 * trans-decl.c (gfc_finish_var_decl): Fixed displaced comment.
 * trans-stmt.c (gfc_trans_allocate): Fixed indentation.
 
 OK to commit. Thanks!
 
 FX


-- 
Andre Vehreschild * Kreuzherrenstr. 8 * 52062 Aachen
Tel.: +49 241 9291018 * Email: ve...@gmx.de

Re: [PATCH] Fix PR64415

2015-01-13 Thread Richard Biener

On Tue, 13 Jan 2015, Jakub Jelinek wrote:

 On Tue, Jan 13, 2015 at 02:04:26PM +0100, Richard Biener wrote:
  The following removes -fvar-tracking-assignments from being eligible
  to the optimization attribute/pragma which fixes LTO operation for
  mixed inputs (LTO just drops debug stmts if the flag is false).
  
  In theory we could also fix inlining to do that when inlining
  debug stmts into a non-VTA function but I think allowing this 
  kind of per-function IL flags is just silly.
 
 I actually think it makes sense to disable -fvar-tracking-assignments
 just for specific function, e.g. when it is known to be too expensive
 on some large function you don't care about debug info quality too much,
 while you still don't want to disable it on the whole TU level, because
 you have other functions (e.g. small ones) you still want to be able to
 debug often with good coverage.
 
 So if this is fixable in the inliner and/or LTO in-streamer that would be
 my preference.

The following seems to work (for the testcase).  Testing coverage
of this mode will of course be bad.

Richard.

2015-01-13  Richard Biener  rguent...@suse.de

PR lto/64415
* tree-inline.c (insert_debug_decl_map): Check destination
function MAY_HAVE_DEBUG_STMTS.
(insert_init_debug_bind): Likewise.
(insert_init_stmt): Remove redundant check.
(remap_gimple_stmt): Drop debug stmts if the destination
function has var-tracking assignments disabled.

* gcc.dg/lto/pr64415_0.c: New testcase.
* gcc.dg/lto/pr64415_1.c: Likewise. 

Index: gcc/testsuite/gcc.dg/lto/pr64415_0.c
===
--- gcc/testsuite/gcc.dg/lto/pr64415_0.c(revision 0)
+++ gcc/testsuite/gcc.dg/lto/pr64415_0.c(working copy)
@@ -0,0 +1,13 @@
+/* { dg-lto-do link } */
+/* { dg-require-effective-target fpic } */
+/* { dg-lto-options { { -O -flto -fpic } } } */
+/* { dg-extra-ld-options { -shared } } */
+
+extern void bar(char *, int);
+
+extern char *baz;
+
+void foo()
+{
+  bar(baz, 0);
+}
Index: gcc/testsuite/gcc.dg/lto/pr64415_1.c
===
--- gcc/testsuite/gcc.dg/lto/pr64415_1.c(revision 0)
+++ gcc/testsuite/gcc.dg/lto/pr64415_1.c(working copy)
@@ -0,0 +1,17 @@
+/* { dg-options -g } */
+
+extern int n;
+
+void bar(char *, int);
+
+inline void bar(char *s, int i)
+{
+  char *p = s;
+
+#ifdef V1
+  if (i)
+#else
+  if (n)
+#endif
+*s = 0;
+}
Index: gcc/tree-inline.c
===
--- gcc/tree-inline.c   (revision 219520)
+++ gcc/tree-inline.c   (working copy)
@@ -192,7 +192,7 @@ insert_debug_decl_map (copy_body_data *i
   if (!gimple_in_ssa_p (id-src_cfun))
 return;
 
-  if (!MAY_HAVE_DEBUG_STMTS)
+  if (!opt_for_fn (id-dst_fn, flag_var_tracking_assignments))
 return;
 
   if (!target_for_debug_bind (key))
@@ -1348,6 +1348,10 @@ remap_gimple_stmt (gimple stmt, copy_bod
   bool skip_first = false;
   gimple_seq stmts = NULL;
 
+  if (is_gimple_debug (stmt)
+   !opt_for_fn (id-dst_fn, flag_var_tracking_assignments))
+return stmts;
+
   /* Begin by recognizing trees that we'll completely rewrite for the
  inlining context.  Our output for these trees is completely
  different from out input (e.g. RETURN_EXPR is deleted, and morphs
@@ -3007,7 +3011,7 @@ insert_init_debug_bind (copy_body_data *
   if (!gimple_in_ssa_p (id-src_cfun))
 return NULL;
 
-  if (!MAY_HAVE_DEBUG_STMTS)
+  if (!opt_for_fn (id-dst_fn, flag_var_tracking_assignments))
 return NULL;
 
   tracked_var = target_for_debug_bind (var);
@@ -3063,7 +3067,7 @@ insert_init_stmt (copy_body_data *id, ba
   gsi_insert_after (si, init_stmt, GSI_NEW_STMT);
   gimple_regimplify_operands (init_stmt, si);
 
-  if (!is_gimple_debug (init_stmt)  MAY_HAVE_DEBUG_STMTS)
+  if (!is_gimple_debug (init_stmt))
{
  tree def = gimple_assign_lhs (init_stmt);
  insert_init_debug_bind (id, bb, def, def, init_stmt);

Re: [AArch64] Allow stack pointer as first input to a subtraction

2015-01-13 Thread Marcus Shawcroft

On 13 January 2015 at 10:47, Richard Sandiford
richard.sandif...@arm.com wrote:
 Several sub-based patterns allowed the stack pointer to be the destination
 but not the first source.  This looked like an oversight; in all the patterns
 changed here (but not for example in *sub_mul_imm_mode), the instruction
 allows the stack pointer to appear in both positions.

 Tested on aarch64-linux-gnu.  OK to install?

 Thanks,
 Richard


 gcc/
 * config/aarch64/aarch64.md (subsi3, *subsi3_uxtw, subdi3)
 (*sub_optabALLX:mode_GPI:mode, *sub_optabSHORT:mode_si_uxtw)
 (*sub_optabALLX:mode_shft_GPI:mode)
 (*sub_optabSHORT:mode_shft_si_uxtw, *sub_optabmode_multp2)
 (*sub_optabsi_multp2_uxtw, *sub_uxtmode_multp2)
 (*sub_uxtsi_multp2_uxtw): Add stack pointer sources.

 gcc/testsuite/
 * gcc.target/aarch64/subsp.c: New test.

OK /Marcus

Re: [PATCH/AARCH64] Correctly handle stores of zero in fusion_load_store

2015-01-13 Thread Marcus Shawcroft

On 13 January 2015 at 04:48, Andrew Pinski pins...@gmail.com wrote:

 ChangeLog:
 * config/aarch64/aarch64.c (fusion_load_store): Check dest mode
 instead of src mode.


 * gcc.target/aarch64/store-pair-1.c: New testcase.

OK, thanks /Marcus

Re: [PATCH]: New configure options that make the compiler use -fPIE and -pie as default option

2015-01-13 Thread H.J. Lu

On Mon, Jan 12, 2015 at 11:50:41PM +, Joseph Myers wrote:
 On Mon, 12 Jan 2015, H.J. Lu wrote:
 
  +if test x$enable_default_pie = xyes; then
  +  AC_MSG_CHECKING(if $target supports default PIE)
  +  enable_default_pie=no
  +  case $target in
  +i?86*-*-linux* | x86_64*-*-linux*)
  +  saved_LDFLAGS=$LDFLAGS
  +  saved_CFLAGS=$CFLAGS
  +  CFLAGS=$CFLAGS -fPIE
  +  LDFLAGS=$LDFLAGS -fPIE -pie
  +  AC_TRY_LINK(,,[enable_default_pie=yes],)
  +  LDFLAGS=$saved_LDFLAGS
  +  CFLAGS=$saved_CFLAGS
  +  ;;
  +*)
  +  ;;
  +esac
 
 There should not be any such hardcoding of targets here without concrete 
 evidence that the targets for which this sets enable_default_pie=no really 
 cannot support PIE.  In particular, there is no reason at all for this to 
 be architecture-specific; all GNU/Linux architectures should support PIE.
 
 I believe AC_TRY_LINK here will test for the host, whereas what you want 
 to know is what's supported for the target (but it's not possible to run 
 link tests for the target at this point; the compiler for the target 
 hasn't even been built).
 
 So: just presume that if the user passes --enable-default-pie then they 
 know what they are doing, and don't try to override their choice.
 
  diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
  index c9e3bf1..89fc305 100644
  --- a/gcc/doc/install.texi
  +++ b/gcc/doc/install.texi
  @@ -1583,6 +1583,10 @@ not be built.
   Specify that the run-time libraries for stack smashing protection
   should not be built.
   
  +@item --enable-default-pie
  +Turn on @option{-fPIE} and @option{-pie} by default if supported.
  +Currently supported targets are i?86-*-linux* and x86-64-*-linux*.
 
 The if supported and target list can then be removed here.
 

Here is the updated patch.  To support --enable-default-pie, each target
must update STARTFILE_SPEC to support PIE_SPEC and NO_PIE_SPEC.  I can
provide STARTFILE_SPEC patch if needed.

Thanks.


H.J.
---
gcc/

2015-01-12  Magnus Granberg  zo...@gentoo.org
H.J. Lu  hongjiu...@intel.com

* Makefile.in (COMPILER): Add @NO_PIE_CFLAGS@.
(LINKER): Add @NO_PIE_FLAG@.
(libgcc.mvars): Set NO_PIE_CFLAGS to -fno-PIE for
--enable-default-pie.
* common.opt (fPIE): Initialize to -1.
(fpie): Likewise.
(static): Add RejectNegative Negative(shared).
(no-pie): New option.
(pie): Replace Negative(shared) with Negative(no-pie).
* configure.ac: Add --enable-default-pie.
(NO_PIE_CFLAGS): New.  Check if -fno-PIE works.  AC_SUBST.
(NO_PIE_FLAG): New.  Check if -no-pie works.  AC_SUBST.
* defaults.h (DEFAULT_FLAG_PIE): New.  Default PIE to -fPIE.
* gcc.c (NO_PIE_SPEC): New.
(PIE_SPEC): Likewise.
(LD_PIE_SPEC): Likewise.
(LINK_PIE_SPEC): Handle -no-pie.  Use PIE_SPEC and LD_PIE_SPEC.
* opts.c (DEFAULT_FLAG_PIE): New.  Set to 0 if ENABLE_DEFAULT_PIE
is undefined.
(finish_options): Update opts-x_flag_pie if it is -1.
* config/gnu-user.h (FVTABLE_VERIFY_SPEC): New.
(GNU_USER_TARGET_STARTFILE_SPEC): Use FVTABLE_VERIFY_SPEC.  Use
NO_PIE_SPEC and NO_PIE_SPEC if ENABLE_DEFAULT_PIE is defined.
(GNU_USER_TARGET_STARTFILE_SPEC): Use FVTABLE_VERIFY_SPEC.
* doc/install.texi: Document --enable-default-pie.
* doc/invoke.texi: Document -no-pie.
* config.in: Regenerated.
* configure: Likewise.

gcc/ada/

2015-01-12  H.J. Lu  hongjiu...@intel.com

* gcc-interface/Makefile.in (TOOLS_LIBS): Add @NO_PIE_FLAG@.

libgcc/

2015-01-12  H.J. Lu  hongjiu...@intel.com

* Makefile.in (CRTSTUFF_CFLAGS): Add $(NO_PIE_CFLAGS).

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 5f9261f..180751f 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -252,6 +252,12 @@ LINKER = $(CC)
 LINKER_FLAGS = $(CFLAGS)
 endif
 
+# We don't want to compile the compiler with -fPIE, it make PCH fail.
+COMPILER += @NO_PIE_CFLAGS@
+
+# Link with -no-pie since we compile the compiler with -fno-PIE.
+LINKER += @NO_PIE_FLAG@
+
 # Like LINKER, but use a mutex for serializing front end links.
 ifeq (@DO_LINK_MUTEX@,true)
 LLINKER = $(SHELL) $(srcdir)/lock-and-run.sh linkfe.lck $(LINKER)
@@ -1854,6 +1860,12 @@ libgcc.mvars: config.status Makefile specs xgcc$(exeext)
echo GCC_CFLAGS = '$(GCC_CFLAGS)'  tmp-libgcc.mvars
echo INHIBIT_LIBC_CFLAGS = '$(INHIBIT_LIBC_CFLAGS)'  tmp-libgcc.mvars
echo TARGET_SYSTEM_ROOT = '$(TARGET_SYSTEM_ROOT)'  tmp-libgcc.mvars
+   if test @enable_default_pie@ = yes; then \
+ NO_PIE_CFLAGS=-fno-PIE; \
+   else \
+ NO_PIE_CFLAGS=; \
+   fi; \
+   echo NO_PIE_CFLAGS = $$NO_PIE_CFLAGS  tmp-libgcc.mvars
 
mv tmp-libgcc.mvars libgcc.mvars
 
diff --git a/gcc/ada/gcc-interface/Makefile.in 
b/gcc/ada/gcc-interface/Makefile.in
index 870cfab..a446d48 100644
--- a/gcc/ada/gcc-interface/Makefile.in

Re: [testsuite] PATCH: Check if -pg available

2015-01-13 Thread H.J. Lu

On Tue, Jan 13, 2015 at 04:54:32AM -0800, H.J. Lu wrote:
 On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote:
  On 01/12/15 14:51, Magnus Granberg wrote:
  måndag 12 januari 2015 12.11.17 skrev  H.J. Lu:
  On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote:
  On 01/12/15 12:59, H.J. Lu wrote:
  I don't know if -pg will work PIE on any targets.  For Linux/x86
  the choices of crt1.o are
  
  %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}
  
  -shared, -pg and -pie are mutually exclusive. Those crt1 files are
  only crt1 files provided by glibc.  You can't even try -pg -pie on
  Linux without changing glibc.
  
  You're totally missing the point.  What I care about is *why*.
  
  With -pg it use gcrt1.o object file and that file is not compile with 
  -fPIC.
  When you build a shared lib on x86_64 all the objects files need to be 
  buiit
  with -fPIC else you get a error like that one abow and it is the same 
  problems
  when you build bin with -fPIE and linke with -pie.
  Glibc do not provide one that is compile with -fPIC
  Is there some reason why glibc could not provide gcrt1.o compiled with
  -fPIC?
  
  
 
 Here is a patch to check if -pg is available.  If -pg doesn't link,
 profiling isn't available.  OK for trunk?
 
 Thanks.
 
 
 H.J.
 ---
  gcc/testsuite/lib/target-supports.exp | 6 ++
  1 file changed, 6 insertions(+)
 
 diff --git a/gcc/testsuite/lib/target-supports.exp 
 b/gcc/testsuite/lib/target-supports.exp
 index 0ac9646..7c09399 100644
 --- a/gcc/testsuite/lib/target-supports.exp
 +++ b/gcc/testsuite/lib/target-supports.exp
 @@ -546,6 +546,12 @@ proc check_profiling_available { test_what } {
   set profiling_available_saved 0
   } else {
   set profiling_available_saved 1
 + if { [check_no_compiler_messages_nocache profiling executable {
 +   int main() { return 0; } } -pg] } {
 + set profiling_available_saved 1
 +  } else {
 + set profiling_available_saved 0
 + }
   }
  }
  

Here is the ChangeLog entry.

2015-01-13  H.J. Lu  hongjiu...@intel.com

* lib/target-supports.exp (check_profiling_available): Check if
-pg links.

H.J.

[PATCH] Fix PR64373

2015-01-13 Thread Richard Biener


The following patch guards LTO against PARM_DECLs without DECL_CONTEXT.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-02-13  Richard Biener  rguent...@suse.de

PR lto/64373
* lto-streamer-out.c (tree_is_indexable): Guard for NULL
DECL_CONTEXT.

* gcc.dg/lto/pr64373_0.c: New testcase.

Index: gcc/lto-streamer-out.c
===
--- gcc/lto-streamer-out.c  (revision 219520)
+++ gcc/lto-streamer-out.c  (working copy)
@@ -154,7 +154,8 @@ tree_is_indexable (tree t)
   /* Parameters and return values of functions of variably modified types
  must go to global stream, because they may be used in the type
  definition.  */
-  if (TREE_CODE (t) == PARM_DECL || TREE_CODE (t) == RESULT_DECL)
+  if ((TREE_CODE (t) == PARM_DECL || TREE_CODE (t) == RESULT_DECL)
+   DECL_CONTEXT (t))
 return variably_modified_type_p (TREE_TYPE (DECL_CONTEXT (t)), NULL_TREE);
   /* IMPORTED_DECL is put into BLOCK and thus it never can be shared.  */
   else if (TREE_CODE (t) == IMPORTED_DECL)
Index: gcc/testsuite/gcc.dg/lto/pr64373_0.c
===
--- gcc/testsuite/gcc.dg/lto/pr64373_0.c(revision 0)
+++ gcc/testsuite/gcc.dg/lto/pr64373_0.c(working copy)
@@ -0,0 +1,10 @@
+/* { dg-lto-do assemble } */
+
+extern void b(int L, float (*data)[L]);
+
+void a(void)
+{
+  float* p = 0;
+  int i = 0;
+  b(10, (float (*)[10])(p + i));
+}

Re: [PATCH] Fix PR64415

2015-01-13 Thread Jakub Jelinek

On Tue, Jan 13, 2015 at 02:04:26PM +0100, Richard Biener wrote:
 The following removes -fvar-tracking-assignments from being eligible
 to the optimization attribute/pragma which fixes LTO operation for
 mixed inputs (LTO just drops debug stmts if the flag is false).
 
 In theory we could also fix inlining to do that when inlining
 debug stmts into a non-VTA function but I think allowing this 
 kind of per-function IL flags is just silly.

I actually think it makes sense to disable -fvar-tracking-assignments
just for specific function, e.g. when it is known to be too expensive
on some large function you don't care about debug info quality too much,
while you still don't want to disable it on the whole TU level, because
you have other functions (e.g. small ones) you still want to be able to
debug often with good coverage.

So if this is fixable in the inliner and/or LTO in-streamer that would be
my preference.

Jakub

[PATCH] Fix PR64406

2015-01-13 Thread Richard Biener


When a optimization pass in the loop pipeline moves stmts between
loops or removes loops we have to reset the SCEV cache to not
have stale CHREC_LOOPs.  This patch does it for loop distribution
for which I have a testcase.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-01-13  Richard Biener  rguent...@suse.de

PR tree-optimization/64406
* tree-loop-distibution.c (pass_loop_distribution::execute):
Reset the SCEV hashtable if we distributed anything.

* gcc.dg/pr64406.c: New testcase.

Index: gcc/tree-loop-distribution.c
===
--- gcc/tree-loop-distribution.c(revision 219520)
+++ gcc/tree-loop-distribution.c(working copy)
@@ -1838,6 +1851,9 @@ out:
 
   if (changed)
 {
+  /* Cached scalar evolutions now may refer to wrong or non-existing
+loops.  */
+  scev_reset_htab ();
   mark_virtual_operands_for_renaming (fun);
   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
 }
Index: gcc/testsuite/gcc.dg/pr64406.c
===
--- gcc/testsuite/gcc.dg/pr64406.c  (revision 0)
+++ gcc/testsuite/gcc.dg/pr64406.c  (working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options -O -ftree-loop-distribute-patterns -fno-tree-loop-ivcanon 
-fno-tree-loop-vectorize -ftree-vectorize } */
+
+unsigned in[72];
+
+void bar (unsigned out[], unsigned ia[]);
+
+void
+foo ()
+{
+  int i;
+  unsigned out[72], ia[8];
+  for (i = 0; i  8; i++)
+{
+  out[i * 8] = in[i * 8] + 5;
+  out[i * 8 + 1] = in[i * 8 + 1] + 6;
+  out[i * 8 + 2] = in[i * 8 + 2] + 7;
+  out[i * 8 + 3] = in[i * 8 + 3] + 8;
+  out[i * 8 + 4] = in[i * 8 + 4] + 9;
+  out[i * 8 + 5] = in[i * 8 + 5] + 10;
+  out[i * 8 + 6] = in[i * 8 + 6] + 11;
+  out[i * 8 + 7] = in[i * 8 + 7] + 12;
+  ia[i] = in[i];
+}
+  bar (out, ia);
+}

[PATCH][ARM] PR 64149: Remove -mlra/-mno-lra option for ARM.

2015-01-13 Thread Matthew Wahab


Hello,

The LRA register alloator is enabled by default for the ARM backend and 
-mno-lra should no longer be used. This patch removes the -mlra/-mno-lra 
option from the ARM backend.


arm-none-linux-gnueabihf passes gcc-check with no new failures.

Matthew

2015-01-13  Matthew Wahab  matthew.wa...@arm.com

PR target/64149
* config/arm/arm.opt: Remove lra option and arm_lra_flag variable.
* config/arm/arm.h (MODE_BASE_REG_CLASS): Remove use of 
arm_lra_flag,

replace the conditional with it's true branch.
* config/arm/arm.c (TARGET_LRA_P): Set to hook_bool_void_true.
(arm_lra_p): Remove.
* testsuite/gcc.target/arm/thumb1-far-jump-3.c: Remove.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8ca2dd8..e03e063 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -103,7 +103,6 @@ struct four_ints
 
 /* Forward function declarations.  */
 static bool arm_const_not_ok_for_debug_p (rtx);
-static bool arm_lra_p (void);
 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 static int arm_compute_static_chain_stack_bytes (void);
 static arm_stack_offsets *arm_get_frame_offsets (void);
@@ -370,7 +369,7 @@ static const struct attribute_spec arm_attribute_table[] =
 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 
 #undef TARGET_LRA_P
-#define TARGET_LRA_P arm_lra_p
+#define TARGET_LRA_P hook_bool_void_true
 
 #undef  TARGET_ATTRIBUTE_TABLE
 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
@@ -5932,13 +5931,6 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
 }
 }
 
-/* Return true if we use LRA instead of reload pass.  */
-static bool
-arm_lra_p (void)
-{
-  return arm_lra_flag;
-}
-
 /* Return true if mode/type need doubleword alignment.  */
 static bool
 arm_needs_doubleword_align (machine_mode mode, const_tree type)
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 556e2da..ff38017 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1287,14 +1287,10 @@ enum reg_class
 /* For the Thumb the high registers cannot be used as base registers
when addressing quantities in QI or HI mode; if we don't know the
mode, then we must be conservative.  */
-#define MODE_BASE_REG_CLASS(MODE)	\
-  (arm_lra_flag\
-   ? (TARGET_32BIT ? CORE_REGS		\
-  : GET_MODE_SIZE (MODE) = 4 ? BASE_REGS\
-  : LO_REGS)			\
-   : ((TARGET_ARM || (TARGET_THUMB2  !optimize_size)) ? CORE_REGS	\
-  : ((MODE) == SImode) ? BASE_REGS	\
-  : LO_REGS))
+#define MODE_BASE_REG_CLASS(MODE)\
+  (TARGET_32BIT ? CORE_REGS	\
+   : GET_MODE_SIZE (MODE) = 4 ? BASE_REGS			\
+   : LO_REGS)
 
 /* For Thumb we can not support SP+reg addressing, so we return LO_REGS
instead of BASE_REGS.  */
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 5385e4a..6da49b8 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -143,10 +143,6 @@ mfpu=
 Target RejectNegative Joined Enum(arm_fpu) Var(arm_fpu_index)
 Specify the name of the target floating point hardware/format
 
-mlra
-Target Report Var(arm_lra_flag) Init(1) Save
-Use LRA instead of reload (transitional)
-
 mhard-float
 Target RejectNegative Alias(mfloat-abi=, hard) Undocumented
 
diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c
deleted file mode 100644
index 90559ba..000
--- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-3.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/* Catch reload ICE on target thumb1 with far jump optimization.
- * It is also a valid case for non-thumb1 target.  */
-
-/* Add -mno-lra option as it is only reproducable with reload.  It will
-   be removed after reload is completely removed.  */
-/* { dg-options -mno-lra -fomit-frame-pointer } */
-/* { dg-do compile } */
-
-#define C  2
-#define A  4
-#define RGB  (C | A)
-#define GRAY (A)
-
-typedef unsigned long uint_32;
-typedef unsigned char byte;
-typedef byte* bytep;
-
-typedef struct ss
-{
-   uint_32 w;
-   uint_32 r;
-   byte c;
-   byte b;
-   byte p;
-} info;
-
-typedef info * infop;
-
-void
-foo(infop info, bytep row)
-{
-   uint_32 iw = info-w;
-   if (info-c == RGB)
-   {
-  if (info-b == 8)
-  {
- bytep sp = row + info-r;
- bytep dp = sp;
- byte save;
- uint_32 i;
-
- for (i = 0; i  iw; i++)
- {
-save = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = save;
- }
-  }
-
-  else
-  {
- bytep sp = row + info-r;
- bytep dp = sp;
- byte save[2];
- uint_32 i;
-
- for (i = 0; i  iw; i++)
- {
-save[0] = *(--sp);
-save[1] = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = *(--sp);
-*(--dp) = *(--sp);
-

Re: [PATCH 2/4] Pipeline model for APM XGene-1.

2015-01-13 Thread Marcus Shawcroft

On 12 January 2015 at 20:15, Philipp Tomsich
philipp.toms...@theobroma-systems.com wrote:
 ---
  gcc/config/aarch64/aarch64.md |   1 +
  gcc/config/arm/xgene1.md  | 531 
 ++
  2 files changed, 532 insertions(+)
  create mode 100644 gcc/config/arm/xgene1.md

 diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
 index 12e1054..1f6b1b6 100644
 --- a/gcc/config/aarch64/aarch64.md
 +++ b/gcc/config/aarch64/aarch64.md
 @@ -190,6 +190,7 @@
  (include ../arm/cortex-a53.md)
  (include ../arm/cortex-a15.md)
  (include thunderx.md)
 +(include ../arm/xgene1.md

Can we have a ChangeLog entry please.
/Marcus

Re: [PATCH] Fix up computed goto on POINTERS_EXTEND_UNSIGNED targets (PR middle-end/63974)

2015-01-13 Thread H.J. Lu

On Mon, Jan 12, 2015 at 12:19 PM, Jakub Jelinek ja...@redhat.com wrote:
 Hi!

 The 991213-3.c testcase ICEs on aarch64-linux with -mabi=ilp32
 since wide-int merge.  The problem is that
 x = convert_memory_address (Pmode, x)
 is used twice on a VOIDmode CONST_INT, which is wrong.
 For non-VOIDmode rtl the second convert_memory_address
 is a NOP, but for VOIDmode the second call treats the CONST_INT
 returned by the first call as if it was again ptr_mode, rather
 than Pmode.  On aarch64-linux in particular, the constant is
 zero-extended from SImode to DImode in the first call, so it
 is not valid SImode CONST_INT any longer.

 emit_indirect_jump always calls convert_memory_address (Pmode, ...)
 on the operand in optabs.c when handling EXPAND_ADDRESS case
 in maybe_legitimize_operand, so the first convert_memory_address
 is both unnecessary and harmful.

 Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux
 (which do not define POINTERS_EXTEND_UNSIGNED) and tested on the
 problematic testcase with aarch64-linux cross.  Can anyone with
 easy access to POINTERS_EXTEND_UNSIGNED targets (aarch64-linux ilp32,
 x86_64 -mx32, ia64-hpux) please test this?

 Ok for trunk if it works there?

 2015-01-12  Jakub Jelinek  ja...@redhat.com

 PR middle-end/63974
 * cfgexpand.c (expand_computed_goto): Don't call
 convert_memory_address here.

 --- gcc/cfgexpand.c.jj  2015-01-09 21:59:54.0 +0100
 +++ gcc/cfgexpand.c 2015-01-12 14:41:35.210705174 +0100
 @@ -3060,8 +3060,6 @@ expand_computed_goto (tree exp)
  {
rtx x = expand_normal (exp);

 -  x = convert_memory_address (Pmode, x);
 -
do_pending_stack_adjust ();
emit_indirect_jump (x);
  }


No regressions on x32.


-- 
H.J.

Re: [PATCH/AARCH64] Disable load/store pair peephole for volatile mem

2015-01-13 Thread Marcus Shawcroft

On 10 December 2014 at 02:18, Andrew Pinski pins...@gmail.com wrote:
 Hi,
   As mentioned in
 https://gcc.gnu.org/ml/gcc-patches/2014-12/msg00609.html, the
 load/store pair peepholes currently accept volatile mem which can
 cause wrong code as the architecture does not define which part of the
 pair happens first.

 This patch disables the peephole for volatile mem and adds two
 testcases so that volatile loads are not converted into load pair (I
 could add the same for store pair if needed).  In the second testcase,
 only f3 does not get converted to load pair, even though the order of
 the loads are different.

 OK?  Bootstrapped and tested on aarch64-linux-gnu without any regressions.

 Thanks,
 Andrew Pinski

 ChangeLog:
 * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Reject
 volatile mems.
 (aarch64_operands_adjust_ok_for_ldpstp): Likewise.

 testsuite/ChangeLog:
 * gcc.target/aarch64/volatileloadpair-1.c: New testcase.
 * gcc.target/aarch64/volatileloadpair-2.c: New testcase.


OK.

Bin, Feel free to follow up with a patch to reorg the MEM_P

/Marcus

Re: [testsuite] PATCH: Add check_effective_target_pie

2015-01-13 Thread H.J. Lu

On Mon, Jan 12, 2015 at 03:04:20PM -0700, Jeff Law wrote:
 On 01/12/15 14:51, Magnus Granberg wrote:
 måndag 12 januari 2015 12.11.17 skrev  H.J. Lu:
 On Mon, Jan 12, 2015 at 12:03 PM, Jeff Law l...@redhat.com wrote:
 On 01/12/15 12:59, H.J. Lu wrote:
 I don't know if -pg will work PIE on any targets.  For Linux/x86
 the choices of crt1.o are
 
 %{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}
 
 -shared, -pg and -pie are mutually exclusive. Those crt1 files are
 only crt1 files provided by glibc.  You can't even try -pg -pie on
 Linux without changing glibc.
 
 You're totally missing the point.  What I care about is *why*.
 
 With -pg it use gcrt1.o object file and that file is not compile with -fPIC.
 When you build a shared lib on x86_64 all the objects files need to be buiit
 with -fPIC else you get a error like that one abow and it is the same 
 problems
 when you build bin with -fPIE and linke with -pie.
 Glibc do not provide one that is compile with -fPIC
 Is there some reason why glibc could not provide gcrt1.o compiled with
 -fPIC?

That is a good question. We can compile gcrt1.o with -fPIC and it will
work with both -pg and -pg -pie.  I will open a glibc bug.

Here is the updated patch without the check_profiling_available change.
OK for trunk?

Thanks.

H.J.
---
Subject: [PATCH 1/5] Add check_effective_target_pie

Hi,

This patch adds check_effective_target_pie to check if the current
multilib generates PIE by default.

Thanks.

H.J.
---
2015-01-11  H.J. Lu  hongjiu...@intel.com

* gcc.target/i386/pie.c: New test.

* lib/target-supports.exp (check_effective_target_pie): New.
---
 gcc/testsuite/gcc.target/i386/pie.c   | 12 
 gcc/testsuite/lib/target-supports.exp | 10 ++
 2 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pie.c

diff --git a/gcc/testsuite/gcc.target/i386/pie.c 
b/gcc/testsuite/gcc.target/i386/pie.c
new file mode 100644
index 000..0a9f5ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pie.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target pie } } */
+/* { dg-options -O2 } */
+
+int foo (void);
+
+int
+main (void)
+{
+  return foo ();
+}
+
+/* { dg-final { scan-assembler foo@PLT } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index f5c6db8..0ac9646 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1080,6 +1080,16 @@ proc check_effective_target_nonpic { } {
 }]
 }
 
+# Return 1 if the current multilib generates PIE by default.
+
+proc check_effective_target_pie { } {
+return [check_no_compiler_messages pie assembly {
+   #ifndef __PIE__
+   #error unsupported
+   #endif
+}]
+}
+
 # Return 1 if the target does not use a status wrapper.
 
 proc check_effective_target_unwrapped { } {
-- 
1.9.3

Re: [PATCH 1/4] Core definition for APM XGene-1 and associated cost-table.

2015-01-13 Thread Marcus Shawcroft

On 12 January 2015 at 20:15, Philipp Tomsich
philipp.toms...@theobroma-systems.com wrote:

 +2014-11-19  Philipp Tomsich  philipp.toms...@theobroma-systems.com
 +
 +   * config/aarch64/aarch64-cores.def (xgene1): Update/add the
 +   xgene1 (APM XGene-1) core definition.
 +   * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
 +   * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
 +   * doc/invoke.texi: Document -mcpu=xgene1.
 +

Fix the date in the ChangeLog entry... otherwise OK commit it.
Thanks
/Marcus

[PATCH] Fix PRs 64493 and 64495

2015-01-13 Thread Richard Biener


The following fixes a bug in outer loop reduction vectorization which
happens to use a bogus vectorized stmt for the inner loop exit PHI.

Bootstrap and regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2015-01-13  Richard Biener  rguent...@suse.de

PR tree-optimization/64493
PR tree-optimization/64495
* tree-vect-loop.c (vect_finalize_reduction): For double-reductions
assign the proper vectorized PHI to the inner loop exit PHIs.

* gcc.dg/vect/pr64493.c: New testcase.
* gcc.dg/vect/pr64495.c: Likewise.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 219520)
+++ gcc/tree-vect-loop.c(working copy)
@@ -4580,7 +4580,10 @@ vect_finalize_reduction:
 !STMT_VINFO_LIVE_P (exit_phi_vinfo))
   || double_reduc);
 
-  STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
+ if (double_reduc)
+   STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
+ else
+   STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
   if (!double_reduc
   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
   != vect_double_reduction_def)
Index: gcc/testsuite/gcc.dg/vect/pr64493.c
===
--- gcc/testsuite/gcc.dg/vect/pr64493.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr64493.c (working copy)
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include tree-vect.h
+
+int a, b, c, d, e, f, g, h;
+
+int
+main ()
+{
+  check_vect ();
+
+  for (; a; a--)
+for (d = 1; d = 0; d++)
+  for (; d;)
+   if (h)
+ {
+   if (!g) __builtin_abort ();
+   if (!0) __builtin_abort ();
+ }
+
+  for (f = 4; f; f--)
+{
+  for (b = 0; b  2; b++)
+   c |= 1;
+  e |= c;
+}
+
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump vect } } */
Index: gcc/testsuite/gcc.dg/vect/pr64495.c
===
--- gcc/testsuite/gcc.dg/vect/pr64495.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr64495.c (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include assert.h
+#include tree-vect.h
+
+int a, b, c, d, e, f, g, i, j;
+static int *h = e;
+
+int
+main ()
+{
+  check_vect ();
+
+  for (; a;)
+for (; g; g++)
+  for (; f; f++)
+   if (j)
+ {
+   assert(b); 
+   assert(0);
+ }
+  for (i = 24; i; i--)
+{
+  for (c = 0; c  6; c++)
+   d |= 1;
+  *h |= d;
+}
+
+  if (e != 1) 
+__builtin_abort (); 
+
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump vect } } */

Re: [PATCH] Fix PR64415

2015-01-13 Thread Jakub Jelinek

On Tue, Jan 13, 2015 at 02:26:39PM +0100, Richard Biener wrote:
 The following seems to work (for the testcase).  Testing coverage
 of this mode will of course be bad.

LGTM.

 2015-01-13  Richard Biener  rguent...@suse.de
 
   PR lto/64415
   * tree-inline.c (insert_debug_decl_map): Check destination
   function MAY_HAVE_DEBUG_STMTS.
   (insert_init_debug_bind): Likewise.
   (insert_init_stmt): Remove redundant check.
   (remap_gimple_stmt): Drop debug stmts if the destination
   function has var-tracking assignments disabled.
 
   * gcc.dg/lto/pr64415_0.c: New testcase.
   * gcc.dg/lto/pr64415_1.c: Likewise. 

Jakub

Re: [PATCH] Fix PR64404

2015-01-13 Thread Richard Biener

On Mon, 12 Jan 2015, Richard Biener wrote:

 
 I am testing the following patch to fix a latent bug in the vectorizer
 dealing with redundant DRs.
 
 Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Which shows the patch is bogus.  Instead we are not prepared to
handle this situation.  Thus the following patch rejects it, making
the testcase a runtime one as well.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-01-13  Richard Biener  rguent...@suse.de

PR tree-optimization/64404
* tree-vect-stmts.c (vectorizable_load): Reject conflicting
SLP types for CSEd loads.

* gcc.dg/vect/pr64404.c: New testcase.

Index: gcc/tree-vect-stmts.c
===
--- gcc/tree-vect-stmts.c   (revision 219520)
+++ gcc/tree-vect-stmts.c   (working copy)
@@ -5791,6 +5791,20 @@ vectorizable_load (gimple stmt, gimple_s
 group loads with negative dependence distance\n);
  return false;
}
+
+  /* Similarly when the stmt is a load that is both part of a SLP
+ instance and a loop vectorized stmt via the same-dr mechanism
+we have to give up.  */
+  if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
+  (STMT_SLP_TYPE (stmt_info)
+ != STMT_SLP_TYPE (vinfo_for_stmt
+(STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+conflicting SLP types for CSEd load\n);
+ return false;
+   }
 }
 
 
Index: gcc/testsuite/gcc.dg/vect/pr64404.c
===
--- gcc/testsuite/gcc.dg/vect/pr64404.c (revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr64404.c (working copy)
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options --param=sccvn-max-alias-queries-per-access=1 } */
+
+#include tree-vect.h
+
+extern void abort (void);
+
+typedef struct
+{
+  int l, h;
+} tFPinterval;
+
+tFPinterval X[1024];
+tFPinterval Y[1024];
+tFPinterval Z[1024];
+
+void __attribute__((noinline))
+Compute (void)
+{
+  int d;
+  for (d = 0; d  1024; d++)
+{
+  Y[d].l = X[d].l + X[d].h;
+  Y[d].h = Y[d].l;
+  Z[d].l = X[d].l;
+  Z[d].h = X[d].h;
+}
+}
+
+int
+main (void)
+{
+  int d;
+
+  check_vect ();
+
+  for (d = 0; d  1024; d++)
+{
+  X[d].l = d;
+  X[d].h = d + 1;
+  __asm__ volatile ();
+}
+
+  Compute ();
+
+  for (d = 0; d  1024; d++)
+{
+  if (Y[d].l != X[d].l + X[d].h
+|| Y[d].h != Y[d].l
+|| Z[d].l != X[d].l
+|| Z[d].h != X[d].h)
+   abort ();
+  __asm__ volatile ();
+}
+
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump vect } } */

[PATCH] Fix PR64415

2015-01-13 Thread Richard Biener


The following removes -fvar-tracking-assignments from being eligible
to the optimization attribute/pragma which fixes LTO operation for
mixed inputs (LTO just drops debug stmts if the flag is false).

In theory we could also fix inlining to do that when inlining
debug stmts into a non-VTA function but I think allowing this 
kind of per-function IL flags is just silly.

Thoughts?

Thanks,
Richard.

2015-01-13  Richard Biener  rguent...@suse.de

PR lto/64415
* common.opt (fvar-tracking-assignments): Remove 'Optimization'
flag.
(fvar-tracking-assignments-toggle): Likewise.

* gcc.dg/lto/pr64415_0.c: New testcase.
* gcc.dg/lto/pr64415_1.c: Likewise. 

Index: gcc/common.opt
===
--- gcc/common.opt  (revision 219520)
+++ gcc/common.opt  (working copy)
@@ -2397,13 +2397,13 @@ Perform variable tracking
 ; annotations.  When flag_var_tracking_assignments ==
 ; AUTODETECT_VALUE it will be set according to flag_var_tracking.
 fvar-tracking-assignments
-Common Report Var(flag_var_tracking_assignments) Init(2) Optimization
+Common Report Var(flag_var_tracking_assignments) Init(2)
 Perform variable tracking by annotating assignments
 
 ; Nonzero if we should toggle flag_var_tracking_assignments after
 ; processing options and computing its default.  */
 fvar-tracking-assignments-toggle
-Common Report Var(flag_var_tracking_assignments_toggle) Optimization
+Common Report Var(flag_var_tracking_assignments_toggle)
 Toggle -fvar-tracking-assignments
 
 ; Positive if we should track uninitialized variables, negative if
Index: gcc/testsuite/gcc.dg/lto/pr64415_0.c
===
--- gcc/testsuite/gcc.dg/lto/pr64415_0.c(revision 0)
+++ gcc/testsuite/gcc.dg/lto/pr64415_0.c(working copy)
@@ -0,0 +1,13 @@
+/* { dg-lto-do link } */
+/* { dg-require-effective-target fpic } */
+/* { dg-lto-options { { -O -flto -fpic } } } */
+/* { dg-extra-ld-options { -shared } } */
+
+extern void bar(char *, int);
+
+extern char *baz;
+
+void foo()
+{
+  bar(baz, 0);
+}
Index: gcc/testsuite/gcc.dg/lto/pr64415_1.c
===
--- gcc/testsuite/gcc.dg/lto/pr64415_1.c(revision 0)
+++ gcc/testsuite/gcc.dg/lto/pr64415_1.c(working copy)
@@ -0,0 +1,17 @@
+/* { dg-options -g } */
+
+extern int n;
+
+void bar(char *, int);
+
+inline void bar(char *s, int i)
+{
+  char *p = s;
+
+#ifdef V1
+  if (i)
+#else
+  if (n)
+#endif
+*s = 0;
+}

Re: [PATCH] rs6000: Make rs6000_split_logical handle inverted 2nd operand (PR64358)

2015-01-13 Thread David Edelsohn

On Mon, Jan 12, 2015 at 6:52 PM, Pat Haugen pthau...@linux.vnet.ibm.com wrote:
 Following backport tested on 4.8/4.9 with no new regressions. Ok to commit
 to those branches?

 -Pat


 2015-01-12  Pat Haugen  pthau...@us.ibm.com

 Backport from mainline
 2014-12-20  Segher Boessenkool seg...@kernel.crashing.org


 PR target/64358
 * config/rs6000/rs6000.c (rs6000_split_logical_inner): Swap the
 input operands if only the second is inverted.
 * config/rs6000/rs6000.md (*boolcmode3_internal1 for BOOL_128):
 Swap BOOL_REGS_OP1 and BOOL_REGS_OP2.  Correct arguments to
 rs6000_split_logical.
 (*boolcmode3_internal2 for TI2): Swap operands[1] and operands[2].

Okay.

Thanks, David

Re: [PATCH] PR59448 - Promote consume to acquire

2015-01-13 Thread Andrew MacLeod


On 01/13/2015 09:59 AM, Richard Biener wrote:

On Tue, Jan 13, 2015 at 3:56 PM, Andrew MacLeod amacl...@redhat.com wrote:

Lengthy discussion : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59448

Basically we can generate incorrect code for an atomic consume operation in
some circumstances.  The general feeling seems to be that we should simply
promote all consume operations to an acquire operation until there is a
better definition/understanding of the consume model and how GCC can track
it.

I proposed a simple patch in the PR, and I have not seen or heard of any
dissenting opinion.   We should get this in before the end of stage 3 I
think.

The problem with the patch in the PR is the  memory model is immediately
promoted from consume to acquire.   This happens *before* any of the
memmodel checks are made.  If a consume is illegally specified (such as in a
compare_exchange), it gets promoted to acquire and the compiler doesn't
report the error because it never sees the consume.

This new patch simply makes the adjustment after any errors are checked on
the originally specified model.   It bootstraps on x86_64-unknown-linux-gnu
and passes all regression testing.
I also built an aarch64 compiler and it appears to issue the LDAR as
specified in the PR, but anyone with a vested interest really ought to check
it out with a real build to be sure.

OK for trunk?

Why not patch get_memmodel?  (not sure if that catches all cases)

Richard.



That was the original patch.

The issue is that it  promotes consume to acquire before any error 
checking gets to look at the model, so then we allow illegal 
specification of consume. (It actually triggers a failure in the testsuite)


Andrew

[[ARM/AArch64][testsuite] 09/36] Add vsubhn, vraddhn and vrsubhn tests. Split vaddhn.c into vXXXhn.inc and vaddhn.c to share code with other new tests.

2015-01-13 Thread Christophe Lyon


* gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vraddhn.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vrsubhn.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vsubhn.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: Use code from
vXXXhn.inc.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc
new file mode 100644
index 000..0dbcc92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc
@@ -0,0 +1,50 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result.  */
+#define TEST_VADDHN1(INSN, T1, T2, W, W2, N)   \
+  VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+VECT_VAR(vector2, T1, W, N)); \
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
+
+#define TEST_VADDHN(INSN, T1, T2, W, W2, N)\
+  TEST_VADDHN1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector1);
+  DECL_VARIABLE_128BITS_VARIANTS(vector2);
+
+  clean_results ();
+
+  /* Fill input vector1 and vector2 with arbitrary values */
+  VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1));
+  VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1));
+  VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1));
+  VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1));
+  VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1));
+  VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1));
+
+  VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX);
+  VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX);
+  VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX);
+  VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX);
+  VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX);
+  VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX);
+
+  TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VADDHN(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VADDHN(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VADDHN(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VADDHN(INSN_NAME, uint, u, 64, 32, 2);
+
+  CHECK_RESULTS (TEST_MSG, );
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c
index 58fd5ea..88c92f3 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c
@@ -8,6 +8,9 @@
 #include stdint.h
 #endif
 
+#define INSN_NAME vaddhn
+#define TEST_MSG VADDHN
+
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32,
   0x32, 0x32, 0x32, 0x32 };
@@ -52,56 +55,4 @@ VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 
0x, 0x,
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x, 0x,
   0x, 0x };
 
-#define INSN_NAME vaddhn
-#define TEST_MSG VADDHN
-
-#define FNNAME1(NAME) exec_ ## NAME
-#define FNNAME(NAME) FNNAME1(NAME)
-
-void FNNAME (INSN_NAME) (void)
-{
-  /* Basic test: vec64=vaddhn(vec128_a, vec128_b), then store the result.  */
-#define TEST_VADDHN1(INSN, T1, T2, W, W2, N)   \
-  VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \
-VECT_VAR(vector2, T1, W, N)); \
-  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
-
-#define TEST_VADDHN(INSN, T1, T2, W, W2, N)\
-  TEST_VADDHN1(INSN, T1, T2, W, W2, N)
-
-  DECL_VARIABLE_64BITS_VARIANTS(vector64);
-  DECL_VARIABLE_128BITS_VARIANTS(vector1);
-  DECL_VARIABLE_128BITS_VARIANTS(vector2);
-
-  clean_results ();
-
-  /* Fill input vector1 and vector2 with arbitrary values */
-  VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1));
-  VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1));
-  VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1));
-  VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1));
-  VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1));
-  VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1));
-
-  VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX);
-  VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX);
-  VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX);
-  VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX);
-  VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX);
-  VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX);
-
-  TEST_VADDHN(INSN_NAME, int, s, 16, 8, 8);
-  TEST_VADDHN(INSN_NAME, int, s, 32, 16, 4);

[[ARM/AArch64][testsuite] 12/36] Add vmlal_n and vmlsl_n tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc
new file mode 100644
index 000..a968584
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc
@@ -0,0 +1,61 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vmlxl_n(vector, vector2, val),
+ then store the result.  */
+#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V)   \
+  VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), 
\
+  VECT_VAR(vector2, T1, W2, 
N), \
+  V);  \
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V)\
+  TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V)
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x55);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11);
+  TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22);
+  TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33);
+  TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, );
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c
new file mode 100644
index 000..118068c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c
@@ -0,0 +1,14 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+#define INSN_NAME vmlal_n
+#define TEST_MSG VMLAL_N
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x595, 0x596, 0x597, 0x598 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xb3a, 0xb3b };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x10df, 0x10e0 };
+
+#include vmlXl_n.inc
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c
new file mode 100644
index 000..a26c69f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c
@@ -0,0 +1,18 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+#define INSN_NAME vmlsl_n
+#define TEST_MSG VMLSL_N
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfa4b, 0xfa4c,
+   0xfa4d, 0xfa4e };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xf4a6,
+   0xf4a7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xef01, 0xef02,
+0xef03, 0xef04 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xef01,
+0xef02 };
+
+#include vmlXl_n.inc
-- 
2.1.0

[[ARM/AArch64][testsuite] 07/36] Add vmla_lane and vmls_lane tests.

2015-01-13 Thread Christophe Lyon


* gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmls_lane.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc
new file mode 100644
index 000..b644a0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc
@@ -0,0 +1,91 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMLX_LANE(VAR)\
+  DECL_VARIABLE(VAR, int, 16, 4);  \
+  DECL_VARIABLE(VAR, int, 32, 2);  \
+  DECL_VARIABLE(VAR, uint, 16, 4); \
+  DECL_VARIABLE(VAR, uint, 32, 2); \
+  DECL_VARIABLE(VAR, float, 32, 2);\
+  DECL_VARIABLE(VAR, int, 16, 8);  \
+  DECL_VARIABLE(VAR, int, 32, 4);  \
+  DECL_VARIABLE(VAR, uint, 16, 8); \
+  DECL_VARIABLE(VAR, uint, 32, 4); \
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmlx_lane(vector, vector2, vector3, lane),
+ then store the result.  */
+#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L)  \
+  VECT_VAR(vector_res, T1, W, N) = \
+INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \
+  VECT_VAR(vector2, T1, W, N), \
+  VECT_VAR(vector3, T1, W, N2),\
+  L);  \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
+   VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V)   \
+  TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V)
+
+  DECL_VMLX_LANE(vector);
+  DECL_VMLX_LANE(vector2);
+  DECL_VMLX_LANE(vector_res);
+
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector3, float, 32, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x55);
+  VDUP(vector2, , float, f, 32, 2, 55.3f);
+  VDUP(vector2, q, int, s, 16, 8, 0x55);
+  VDUP(vector2, q, int, s, 32, 4, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x55);
+  VDUP(vector2, q, uint, u, 32, 4, 0x55);
+  VDUP(vector2, q, float, f, 32, 4, 55.8f);
+
+  VDUP(vector3, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0xBB);
+  VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0xBB);
+  VDUP(vector3, , float, f, 32, 2, 11.34f);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3);
+  TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1);
+
+  CHECK_RESULTS (TEST_MSG, );
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c
new file mode 100644
index 000..f4b89d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c
@@ -0,0 +1,50 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+#define INSN_NAME vmla
+#define TEST_MSG VMLA_LANE
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+  0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x3e07, 0x3e08 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+   0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };

[[ARM/AArch64][testsuite] 10/36] Add vmlal and vmlsl tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmlal.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmlsl.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc
new file mode 100644
index 000..1e6bab3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc
@@ -0,0 +1,89 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = OP(vector, vector3, vector4),
+ then store the result.  */
+#define TEST_VMLXL1(INSN, T1, T2, W, W2, N)\
+  VECT_VAR(vector_res, T1, W, N) =  \
+INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \
+VECT_VAR(vector3, T1, W2, N),   \
+VECT_VAR(vector4, T1, W2, N));  \
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL(INSN, T1, T2, W, W2, N) \
+  TEST_VMLXL1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector3, int, 8, 8);
+  DECL_VARIABLE(vector4, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector3, uint, 8, 8);
+  DECL_VARIABLE(vector4, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector4, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector4, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  VDUP(vector3, , int, s, 8, 8, 0x55);
+  VDUP(vector4, , int, s, 8, 8, 0xBB);
+  VDUP(vector3, , int, s, 16, 4, 0x55);
+  VDUP(vector4, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0x55);
+  VDUP(vector4, , int, s, 32, 2, 0xBB);
+  VDUP(vector3, , uint, u, 8, 8, 0x55);
+  VDUP(vector4, , uint, u, 8, 8, 0xBB);
+  VDUP(vector3, , uint, u, 16, 4, 0x55);
+  VDUP(vector4, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0x55);
+  VDUP(vector4, , uint, u, 32, 2, 0xBB);
+
+  TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, );
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, );
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, );
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c
new file mode 100644
index 000..c147f31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c
@@ -0,0 +1,18 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+#define INSN_NAME vmlal
+#define TEST_MSG VMLAL
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xe907, 0xe908, 0xe909, 0xe90a,
+   0xe90b, 0xe90c, 0xe90d, 0xe90e };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a,
+0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 };
+
+#include vmlXl.inc
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c
new file mode 100644
index 000..6c984ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c
@@ -0,0 +1,22 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include

[[ARM/AArch64][testsuite] 23/36] Add vmul_lane tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c
new file mode 100644
index 000..978cd9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c
@@ -0,0 +1,104 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfde0, 0xfe02 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xace0, 0xb212 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b6, 0xc3ab };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc,
+   0xffd0, 0xffd4, 0xffd8, 0xffdc };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfde0, 0xfe02,
+   0xfe24, 0xfe46 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c,
+0xccd0, 0xd114, 0xd558, 0xd99c };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xace0, 0xb212,
+0xb744, 0xbc76 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b6, 0xc3ab,
+  0xc39f, 0xc394 };
+
+#define TEST_MSG VMUL_LANE
+void exec_vmul_lane (void)
+{
+#define DECL_VMUL(VAR) \
+  DECL_VARIABLE(VAR, int, 16, 4);  \
+  DECL_VARIABLE(VAR, int, 32, 2);  \
+  DECL_VARIABLE(VAR, uint, 16, 4); \
+  DECL_VARIABLE(VAR, uint, 32, 2); \
+  DECL_VARIABLE(VAR, float, 32, 2);\
+  DECL_VARIABLE(VAR, int, 16, 8);  \
+  DECL_VARIABLE(VAR, int, 32, 4);  \
+  DECL_VARIABLE(VAR, uint, 16, 8); \
+  DECL_VARIABLE(VAR, uint, 32, 4); \
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmul_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L) \
+  VECT_VAR(vector_res, T1, W, N) = \
+vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \
+  VECT_VAR(vector2, T1, W, N2),\
+  L);  \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),
\
+   VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VMUL(vector);
+  DECL_VMUL(vector_res);
+
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 2);
+
+  clean_results ();
+
+  /* Initialize vector from pre-initialized values.  */
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x22);
+  VDUP(vector2, , uint, u, 16, 4, 0x444);
+  VDUP(vector2, , uint, u, 32, 2, 0x532);
+  VDUP(vector2, , float, f, 32, 2, 22.8f);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VMUL_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VMUL_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2);
+  TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1);
+  TEST_VMUL_LANE(, float, f, 32, 2, 2, 1);
+  TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2);
+  TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0);
+  TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2);
+  TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1);
+  TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, );
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, );
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, );
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, );
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, );
+}
+
+int main (void)
+{
+  exec_vmul_lane ();
+  return 0;
+}
-- 
2.1.0

[[ARM/AArch64][testsuite] 24/36] Add vmul_n tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmul_n.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c
new file mode 100644
index 000..be0ee65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c
@@ -0,0 +1,96 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfde0, 0xfe02 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfbc0, 0xfc04 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b2, 0xc3a74000 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf,
+   0xfc04, 0xfc59, 0xfcae, 0xfd03 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xf9a0, 0xfa06,
+   0xfa6c, 0xfad2 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5,
+0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf780, 0xf808,
+0xf890, 0xf918 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000,
+  0xc49b9333, 0xc4907667 };
+
+#define INSN_NAME vmul_n
+#define TEST_MSG VMUL_N
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMUL(VAR) \
+  DECL_VARIABLE(VAR, int, 16, 4);  \
+  DECL_VARIABLE(VAR, int, 32, 2);  \
+  DECL_VARIABLE(VAR, uint, 16, 4); \
+  DECL_VARIABLE(VAR, uint, 32, 2); \
+  DECL_VARIABLE(VAR, float, 32, 2);\
+  DECL_VARIABLE(VAR, int, 16, 8);  \
+  DECL_VARIABLE(VAR, int, 32, 4);  \
+  DECL_VARIABLE(VAR, uint, 16, 8); \
+  DECL_VARIABLE(VAR, uint, 32, 4); \
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmul_n(vector,val), then store the result.  */
+#define TEST_VMUL_N(Q, T1, T2, W, N, L)
\
+  VECT_VAR(vector_res, T1, W, N) = \
+vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),\
+   L); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),
\
+   VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VMUL(vector);
+  DECL_VMUL(vector_res);
+
+  clean_results ();
+
+  /* Initialize vector from pre-initialized values.  */
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VMUL_N(, int, s, 16, 4, 0x11);
+  TEST_VMUL_N(, int, s, 32, 2, 0x22);
+  TEST_VMUL_N(, uint, u, 16, 4, 0x33);
+  TEST_VMUL_N(, uint, u, 32, 2, 0x44);
+  TEST_VMUL_N(, float, f, 32, 2, 22.3f);
+  TEST_VMUL_N(q, int, s, 16, 8, 0x55);
+  TEST_VMUL_N(q, int, s, 32, 4, 0x66);
+  TEST_VMUL_N(q, uint, u, 16, 8, 0x77);
+  TEST_VMUL_N(q, uint, u, 32, 4, 0x88);
+  TEST_VMUL_N(q, float, f, 32, 4, 88.9f);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, );
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, );
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, );
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, );
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, );
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
-- 
2.1.0

[[ARM/AArch64][testsuite] 25/36] Add vmull tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmull.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c
new file mode 100644
index 000..3fdd51e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c
@@ -0,0 +1,75 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x100, 0xe1, 0xc4, 0xa9,
+   0x90, 0x79, 0x64, 0x51 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x100, 0xe1, 0xc4, 0xa9 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x100, 0xe1 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe100, 0xe2e1, 0xe4c4, 0xe6a9,
+0xe890, 0xea79, 0xec64, 0xee51 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe00100, 0xffe200e1,
+0xffe400c4, 0xffe600a9 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffe00100,
+0xffe200e1 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x5500, 0x5501, 0x5504, 0x5505,
+0x5510, 0x5511, 0x5514, 0x5515 };
+
+#define TEST_MSG VMULL
+void exec_vmull (void)
+{
+  /* Basic test: y=vmull(x,x), then store the result.  */
+#define TEST_VMULL(T1, T2, W, W2, N)   \
+  VECT_VAR(vector_res, T1, W2, N) =\
+vmull_##T2##W(VECT_VAR(vector, T1, W, N),  \
+ VECT_VAR(vector, T1, W, N));  \
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, poly, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+  DECL_VARIABLE(vector_res, poly, 16, 8);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , poly, p, 8, 8);
+
+  TEST_VMULL(int, s, 8, 16, 8);
+  TEST_VMULL(int, s, 16, 32, 4);
+  TEST_VMULL(int, s, 32, 64, 2);
+  TEST_VMULL(uint, u, 8, 16, 8);
+  TEST_VMULL(uint, u, 16, 32, 4);
+  TEST_VMULL(uint, u, 32, 64, 2);
+  TEST_VMULL(poly, p, 8, 16, 8);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, );
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, );
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, );
+}
+
+int main (void)
+{
+  exec_vmull ();
+  return 0;
+}
-- 
2.1.0

[[ARM/AArch64][testsuite] 02/36] Be more verbose, and actually confirm that a test was checked.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (CHECK):
Add trace.
(CHECK_FP): Likewise.
(CHECK_CUMULATIVE_SAT): Likewise.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 6464c66..2730a66 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -79,6 +79,7 @@ extern size_t strlen(const char *);
  abort();  \
}   \
   }
\
+fprintf(stderr, CHECKED %s\n, MSG);  \
   }
 
 /* Floating-point variant.  */
@@ -107,6 +108,7 @@ extern size_t strlen(const char *);
  abort();  \
}   \
   }
\
+fprintf(stderr, CHECKED %s\n, MSG);  \
   }
 
 /* Clean buffer with a non-zero pattern to help diagnose buffer
@@ -323,6 +325,7 @@ extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2);
  strlen(COMMENT)  0 ?   COMMENT : );  \
   abort(); \
 }  \
+fprintf(stderr, CHECKED CUMULATIVE SAT %s\n, MSG);   \
   }
 
 #define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment) \
-- 
2.1.0

[[ARM/AArch64][testsuite] 26/36] Add vmull_lane tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c
new file mode 100644
index 000..d3aa879
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c
@@ -0,0 +1,66 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x2000, 0x2000 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000, 0x2000 };
+
+#define TEST_MSG VMULL_LANE
+void exec_vmull_lane (void)
+{
+  /* vector_res = vmull_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VMULL_LANE(T1, T2, W, W2, N, L)   \
+  VECT_VAR(vector_res, T1, W2, N) =\
+vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N),   \
+VECT_VAR(vector2, T1, W, N),   \
+L);\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x1000);
+  VDUP(vector, , uint, u, 16, 4, 0x1000);
+  VDUP(vector, , uint, u, 32, 2, 0x1000);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x2);
+  VDUP(vector2, , uint, u, 16, 4, 0x4);
+  VDUP(vector2, , uint, u, 32, 2, 0x2);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VMULL_LANE(int, s, 16, 32, 4, 2);
+  TEST_VMULL_LANE(int, s, 32, 64, 2, 1);
+  TEST_VMULL_LANE(uint, u, 16, 32, 4, 2);
+  TEST_VMULL_LANE(uint, u, 32, 64, 2, 1);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, );
+}
+
+int main (void)
+{
+  exec_vmull_lane ();
+  return 0;
+}
-- 
2.1.0

[[ARM/AArch64][testsuite] 35/36] Add vqdmull_lane tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c
new file mode 100644
index 000..12f2a6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c
@@ -0,0 +1,94 @@
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000, 0x4000 };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fff, 0x7fff,
+0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fff,
+0x7fff };
+
+#define INSN_NAME vqdmull
+#define TEST_MSG VQDMULL_LANE
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, 
CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \
+  VECT_VAR(vector_res, T1, W2, N) =\
+INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N),\
+   VECT_VAR(vector2, T1, W, N),\
+   L); \
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),  \
+VECT_VAR(vector_res, T1, W2, N));  \
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, 
CMT) \
+  TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULL_LANE1(INSN_NAME, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, 
CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x1000);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x2);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat, );
+  TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat, );
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, );
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, );
+
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x8000);
+  VDUP(vector2, , int, s, 32, 2, 0x8000);
+
+#define TEST_MSG2 with saturation
+  TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
-- 
2.1.0

1 2 >

1 - 100 of 178 matches

Mail list logo