[Bug target/67351] Missed optimisation on 64-bit field compared to 32-bit

2015-08-25 Thread ubizjak at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67351

Uroš Bizjak ubizjak at gmail dot com changed:

   What|Removed |Added

 CC||rguenth at gcc dot gnu.org,
   ||ubizjak at gmail dot com

--- Comment #3 from Uroš Bizjak ubizjak at gmail dot com ---
(In reply to Uroš Bizjak from comment #2)
 (In reply to Allan Jensen from comment #0)
 
  Gcc will expand and detect field setting on 32-bit integers, but for some
  reason miss the opportunity on 64-bit.
 
 The immediates for 64bit logic insns are limited to sign-extended 32bit
 values, so this probably limits combine to combine several insns into one.

One example is:

(insn 8 6 9 2 (parallel [
(set (reg:DI 100)
(lshiftrt:DI (reg/v:DI 98 [ a ])
(const_int 48 [0x30])))
(clobber (reg:CC 17 flags))
]) test.cpp:63 538 {*lshrdi3_1}
 (expr_list:REG_UNUSED (reg:CC 17 flags)
(nil)))
(insn 9 8 10 2 (parallel [
(set (reg:DI 101)
(ashift:DI (reg:DI 100)
(const_int 48 [0x30])))
(clobber (reg:CC 17 flags))
]) test.cpp:63 504 {*ashldi3_1}
 (expr_list:REG_DEAD (reg:DI 100)
(expr_list:REG_UNUSED (reg:CC 17 flags)
(nil

combine tries to:

Trying 8 - 9:
Failed to match this instruction:
(parallel [
(set (reg:DI 101)
(and:DI (reg/v:DI 98 [ a ])
(const_int -281474976710656 [0x])))
(clobber (reg:CC 17 flags))
])

However, tree optimizers pass to expand the following sequence:

  a = giveMe64 ();
  a$rgba_5 = MEM[(struct MyRgba64 *)a];
  _6 = a$rgba_5  16;
  _7 = a$rgba_5  48;
  _8 = _7  48;
  _10 = _6  16;
  _11 = _10  4294967295;
  _13 = a$rgba_5  65535;
  _15 = _13 | 264913582817280;
  _16 = _8 | _15;
  _14 = _11 | _16;
  MEM[(struct MyRgba64 *)D.2451] = _14;
  return D.2451;

Richi, can these shifts be converted to equivalent masking in tree optimizers?

[gomp4.1] comment some stuff

2015-08-25 Thread Aldy Hernandez
I'm obviously not smart enough to understand libgomp's tasking runtime, 
and rth and you get 0 for commenting skills ;-).


I had some notes scribbled down while reading the code, and figured 
someone else might read this code some day.  It's still in dire need of 
commenting, but this mildly helps.


OK for branch?
commit 5fc2816946c9250c4cca43d002b364b2d6400919
Author: Aldy Hernandez al...@redhat.com
Date:   Tue Aug 25 10:32:48 2015 -0700

* env.c: Make gomp_max_task_priority_var static.
* libgomp.h (struct gomp_task_depend_entry): Add comment.
* task.c (gomp_clear_parent): Document function.
(GOMP_task): Same.
(gomp_task_run_pre): Add comments.
(gomp_task_run_post_handle_dependers): Same.
(gomp_task_run_post_remove_parent): Same.
(gomp_task_run_post_remove_taskgroup): Same.
(GOMP_taskwait): Same.
(gomp_task_maybe_wait_for_dependencies): Same.

diff --git a/libgomp/env.c b/libgomp/env.c
index 65a6851..0569521 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -69,7 +69,7 @@ struct gomp_task_icv gomp_global_icv = {
 
 unsigned long gomp_max_active_levels_var = INT_MAX;
 bool gomp_cancel_var = false;
-int gomp_max_task_priority_var = 0;
+static int gomp_max_task_priority_var = 0;
 #ifndef HAVE_SYNC_BUILTINS
 gomp_mutex_t gomp_managed_threads_lock;
 #endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 9031649..3d705ef 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -279,6 +279,7 @@ struct gomp_task_depend_entry
   struct gomp_task_depend_entry *next;
   struct gomp_task_depend_entry *prev;
   struct gomp_task *task;
+  /* Depend entry is of type IN.  */
   bool is_in;
   bool redundant;
   bool redundant_out;
diff --git a/libgomp/task.c b/libgomp/task.c
index f2a0fae..7c7bae4 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -92,6 +92,8 @@ gomp_end_task (void)
   thr-task = task-parent;
 }
 
+/* Orphan the task in CHILDREN and all its siblings.  */
+
 static inline void
 gomp_clear_parent (struct gomp_task *children)
 {
@@ -110,7 +112,12 @@ static void gomp_task_maybe_wait_for_dependencies (void 
**depend);
 
 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
false, then we must not delay in executing the task.  If UNTIED is true,
-   then the task may be executed by any member of the team.  */
+   then the task may be executed by any member of the team.
+
+   DEPEND is an array containing:
+   depend[0]: number of depend elements.
+   depend[1]: number of depend elements of type out.
+   depend[N+2]: address of [0..N]th depend element.  */
 
 void
 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
@@ -444,8 +451,10 @@ gomp_task_run_pre (struct gomp_task *child_task, struct 
gomp_task *parent,
 {
   if (parent)
 {
+  /* Remove child_task from parent.  */
   if (parent-children == child_task)
parent-children = child_task-next_child;
+
   if (__builtin_expect (child_task-parent_depends_on, 0)
   parent-taskwait-last_parent_depends_on == child_task)
{
@@ -456,8 +465,10 @@ gomp_task_run_pre (struct gomp_task *child_task, struct 
gomp_task *parent,
parent-taskwait-last_parent_depends_on = NULL;
}
 }
+  /* Remove child_task from taskgroup.  */
   if (taskgroup  taskgroup-children == child_task)
 taskgroup-children = child_task-next_taskgroup;
+
   child_task-prev_queue-next_queue = child_task-next_queue;
   child_task-next_queue-prev_queue = child_task-prev_queue;
   if (team-task_queue == child_task)
@@ -528,6 +539,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task 
*child_task,
  if (parent-taskwait  parent-taskwait-last_parent_depends_on
   !task-parent_depends_on)
{
+ /* Put task in last_parent_depends_on.  */
  struct gomp_task *last_parent_depends_on
= parent-taskwait-last_parent_depends_on;
  task-next_child = last_parent_depends_on-next_child;
@@ -535,6 +547,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task 
*child_task,
}
  else
{
+ /* Put task at the top of the sibling list.  */
  task-next_child = parent-children;
  task-prev_child = parent-children-prev_child;
  parent-children = task;
@@ -544,6 +557,7 @@ gomp_task_run_post_handle_dependers (struct gomp_task 
*child_task,
}
  else
{
+ /* Put task in the sibling list.  */
  task-next_child = task;
  task-prev_child = task;
  parent-children = task;
@@ -628,12 +642,18 @@ gomp_task_run_post_handle_depend (struct gomp_task 
*child_task,
   return gomp_task_run_post_handle_dependers (child_task, team);
 }
 
+/* Remove CHILD_TASK from its parent.  */
+
 static inline void
 gomp_task_run_post_remove_parent (struct 

Re: [gomp4.1] comment some stuff

2015-08-25 Thread Aldy Hernandez

On 08/25/2015 10:35 AM, Aldy Hernandez wrote:

-int gomp_max_task_priority_var = 0;
+static int gomp_max_task_priority_var = 0;


Sorry I snuck that in there.  The variable is unused elsewhere, might as 
well make it static.


Aldy


[Bug target/67351] Missed optimisation on 64-bit field compared to 32-bit

2015-08-25 Thread pinskia at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67351

--- Comment #4 from Andrew Pinski pinskia at gcc dot gnu.org ---
(In reply to Uroš Bizjak from comment #3)
 (In reply to Uroš Bizjak from comment #2)
  (In reply to Allan Jensen from comment #0)
  
   Gcc will expand and detect field setting on 32-bit integers, but for some
   reason miss the opportunity on 64-bit.
  
  The immediates for 64bit logic insns are limited to sign-extended 32bit
  values, so this probably limits combine to combine several insns into one.
 
 One example is:
 
 (insn 8 6 9 2 (parallel [
 (set (reg:DI 100)
 (lshiftrt:DI (reg/v:DI 98 [ a ])
 (const_int 48 [0x30])))
 (clobber (reg:CC 17 flags))
 ]) test.cpp:63 538 {*lshrdi3_1}
  (expr_list:REG_UNUSED (reg:CC 17 flags)
 (nil)))
 (insn 9 8 10 2 (parallel [
 (set (reg:DI 101)
 (ashift:DI (reg:DI 100)
 (const_int 48 [0x30])))
 (clobber (reg:CC 17 flags))
 ]) test.cpp:63 504 {*ashldi3_1}
  (expr_list:REG_DEAD (reg:DI 100)
 (expr_list:REG_UNUSED (reg:CC 17 flags)
 (nil
 
 combine tries to:
 
 Trying 8 - 9:
 Failed to match this instruction:
 (parallel [
 (set (reg:DI 101)
 (and:DI (reg/v:DI 98 [ a ])
 (const_int -281474976710656 [0x])))
 (clobber (reg:CC 17 flags))
 ])
 
 However, tree optimizers pass to expand the following sequence:
 
   a = giveMe64 ();
   a$rgba_5 = MEM[(struct MyRgba64 *)a];
   _6 = a$rgba_5  16;
   _7 = a$rgba_5  48;
   _8 = _7  48;
   _10 = _6  16;
   _11 = _10  4294967295;
   _13 = a$rgba_5  65535;
   _15 = _13 | 264913582817280;
   _16 = _8 | _15;
   _14 = _11 | _16;
   MEM[(struct MyRgba64 *)D.2451] = _14;
   return D.2451;
 
 Richi, can these shifts be converted to equivalent masking in tree
 optimizers?


They should be or at least Naveen's patches should handle them.  There is an
open bug filed doing a  N  N and one filed for a  N  N already (I filed
it).

[gomp-4.1] fix incorrect memory size in goacc_new_thread

2015-08-25 Thread Aldy Hernandez
This is either blatantly wrong or subtly correct, in which case it needs 
a comment.  My guess is the former.


OK for branch?
commit 330391636113ed9a9067e6eb639755fb0f4723dc
Author: Aldy Hernandez al...@redhat.com
Date:   Tue Aug 25 10:41:28 2015 -0700

* oacc-init.c (goacc_new_thread): Use correct size of goacc_thread
when allocating memory.

diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index c91731e..e6d2c03 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -312,7 +312,7 @@ acc_shutdown_1 (acc_device_t d)
 static struct goacc_thread *
 goacc_new_thread (void)
 {
-  struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));
+  struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
 
 #if defined HAVE_TLS || defined USE_EMUTLS
   goacc_tls_data = thr;


Re: Fix libbacktrace -fPIC breakage from Use libbacktrace in libgfortran

2015-08-25 Thread Ulrich Weigand
Hans-Peter Nilsson wrote:
  From: Ulrich Weigand uweig...@de.ibm.com
  Date: Tue, 25 Aug 2015 14:59:05 +0200
 
  The other GCC run-time libraries rely on libtool to figure out
  that even though -fPIC works, dynamic libraries are still not
  supported on the platform, and thus compile everything for
  static linking (i.e. without -fPIC).
 
 That's not what I see, at least not the figuring out part.
 (They mostly use libtool as-is; some test tuples, but some test
 version-script support and add it then.)

Well, the figuring out is implicit; because libtool knows the
platform does not support dynamic linking, it defaults to
--disable-shared, which means only static libraries are being
built; and the default compile option when building static
libraries does not use -fPIC.


 I'll leave that to you to sort out, but if you chose to use
 $can_build_shared, consider also setting PIC_FLAG to $pic_flag
 (instead of plain -fPIC).  In the meantime I'll commit my patch
 as it solves *some* of the breakage; for targets erroring on -fPIC.
 
 ...but reading the libtool documention I think I found a much
 better solution: Let's just add -prefer-pic when compiling
 libbacktrace.  It leaves everything to libtool.  Can you please
 test this?

Hmm, reading the documentation an even simpler version that has
equivalent effect to yours should be just adding the pic-only
option when calling LT_INIT.

However, neither works for the SPU, because in both cases libtool
will only do the test whether the target supports the -fPIC option.
It will not test whether the target supports dynamic libraries.

[ It will do that test; and default to --disable-shared on SPU.
That is a no-op for libbacktrace however, since it calls LT_INIT
with the disable-shared option anyway.  When adding back the -fPIC
flag due to either the pic-only LT_INIT option or the -prefer-pic
libtool command line option, it does not check for that again.  ]

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  ulrich.weig...@de.ibm.com



[Bug target/67351] Missed optimisation on 64-bit field compared to 32-bit

2015-08-25 Thread pinskia at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67351

--- Comment #5 from Andrew Pinski pinskia at gcc dot gnu.org ---
Oh his patch only handled multiplies/divide and not shifts.  But it should be
easy to add them to match.pd to simplify this at the tree level.


Re: [libgfortran,patch] Remove never-used debugging code

2015-08-25 Thread FX
Turns out I missed some of the dead code. And I now also fixed comments and 
some formatting.
libgfortran/runtime/environ.c is now much more readable than before.
The patch is still a no-op, in terms of user functionality.
OK to commit to trunk?

FX




unusedcode.ChangeLog
Description: Binary data


unusedcode.diff
Description: Binary data


[nvptx] More gcc testsuite markup

2015-08-25 Thread Nathan Sidwell

I've committed this to markup more test requirements.  Most are obvious enough.

nvptx doesn't expose a normal stack, so stack-based tests fail.  It also 
requires correct typing on function calls, so lying about that results in 
assembler errors.  Finally, it doesn't accept string constants, requiring 
expansion to an array of ints.


nathan
2015-08-25  Nathan Sidwell  nat...@acm.org

	* gcc.dg/20001117-1.c: Needs return_address.
	* gcc.dg/20020415-1.c: Needs alloca.
	* gcc.dg/graphite/id-pr44676.c: Needs profiling.
	* gcc.dg/graphite/pr60979.c: Needs nonlocal_goto
	* gcc.dg/pr63186.c: Needs label_values.
	* gcc.dg/torture/pr33848.c: Likwise.

	* lib/target-supports.exp (check_effective_target_fopenacc,
	check_effective_target_fopenmp): Disable for nvptx.
	* gcc.dg/graphite/run-id-pr47653.c: Disable for nvptx.
	* gcc.dg/stack-usage-1.c: Likewise.
	* gcc.dg/stack-usage-2.c: Likewise.
	* gcc.dg/unused-5.c: Likewise.
	* gcc.dg/unwind-1.c: Likewise.

Index: gcc.dg/20001117-1.c
===
--- gcc.dg/20001117-1.c	(revision 227166)
+++ gcc.dg/20001117-1.c	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options -O2 -finstrument-functions } */
+/* { dg-require-effective-target return_address } */
 
 extern void abort (void);
 extern void exit (int);
Index: gcc.dg/20020415-1.c
===
--- gcc.dg/20020415-1.c	(revision 227166)
+++ gcc.dg/20020415-1.c	(working copy)
@@ -1,9 +1,9 @@
 /* PR target/6303
This testcase ICEd because s390 did not define
ASM_SIMPLIFY_DWARF_ADDR hook.  */
-/* { dg-require-effective-target alloca } */
 /* { dg-do compile { target fpic } } */
 /* { dg-options -O2 -fpic -g } */
+/* { dg-require-effective-target alloca } */
 
 static inline char *
 bar (unsigned long x, char *y)
Index: gcc.dg/graphite/id-pr44676.c
===
--- gcc.dg/graphite/id-pr44676.c	(revision 227166)
+++ gcc.dg/graphite/id-pr44676.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-options -O2 -fgraphite-identity -fprofile-generate } */
+/* { dg-require-profiling -fprofile-generate } */
 
 int
 extend_options (int h, int map, int x, int y, int dx)
Index: gcc.dg/graphite/pr60979.c
===
--- gcc.dg/graphite/pr60979.c	(revision 227166)
+++ gcc.dg/graphite/pr60979.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-options -O -fgraphite-identity } */
+/* { dg-require-effective-target nonlocal_goto } */
 
 #include setjmp.h
 
Index: gcc.dg/graphite/run-id-pr47653.c
===
--- gcc.dg/graphite/run-id-pr47653.c	(revision 227166)
+++ gcc.dg/graphite/run-id-pr47653.c	(working copy)
@@ -1,4 +1,6 @@
 /* { dg-options -O -fstack-check=generic -ftree-pre -fgraphite-identity } */
+/* nvptx doesn't expose a stack.  */
+/* { dg-skip-if  { nvptx-*-* } { * } {  } } */
 
 int main ()
 {
Index: gcc.dg/pr63186.c
===
--- gcc.dg/pr63186.c	(revision 227166)
+++ gcc.dg/pr63186.c	(working copy)
@@ -1,5 +1,7 @@
 /* { dg-do link } */
 /* { dg-options -O2 } */
+/* { dg-require-effective-target label_values } */
+
 void *a;
 int b, c, d;
 
Index: gcc.dg/stack-usage-1.c
===
--- gcc.dg/stack-usage-1.c	(revision 227166)
+++ gcc.dg/stack-usage-1.c	(working copy)
@@ -1,5 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options -fstack-usage } */
+/* nvptx doesn't have a reg allocator, and hence no stack usage data.  */
+/* { dg-skip-if  { nvptx-*-* } { * } {  } } */
 
 /* This is aimed at testing basic support for -fstack-usage in the back-ends.
See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c).
Index: gcc.dg/stack-usage-2.c
===
--- gcc.dg/stack-usage-2.c	(revision 227166)
+++ gcc.dg/stack-usage-2.c	(working copy)
@@ -1,5 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options -Wstack-usage=512 } */
+/* nvptx gets very upset with mismatched function types.  */
+/* { dg-skip-if  { nvptx-*-* } { * } {  } } */
 
 int foo1 (void)  /* { dg-bogus stack usage } */
 {
Index: gcc.dg/torture/pr33848.c
===
--- gcc.dg/torture/pr33848.c	(revision 227166)
+++ gcc.dg/torture/pr33848.c	(working copy)
@@ -1,4 +1,3 @@
-/* { dg-require-effective-target label_values } */
 /* foo should be hoisted, but on most targets, excess register pressure
forces it to be rematerialized before data != foo.  On targets that
have a branch if registers are equal instruction, this leads to the
@@ -7,6 +6,7 @@
say that foo was the target of the branch, and the real target would
then be removed as dead.  */
 /* { dg-do link } */
+/* { dg-require-effective-target label_values } */
 #define NVARS 30
 #define MULTI(X) \
   

[Bug target/67351] Missed optimisation on 64-bit field compared to 32-bit

2015-08-25 Thread ubizjak at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67351

--- Comment #2 from Uroš Bizjak ubizjak at gmail dot com ---
(In reply to Allan Jensen from comment #0)

 Gcc will expand and detect field setting on 32-bit integers, but for some
 reason miss the opportunity on 64-bit.

The immediates for 64bit logic insns are limited to sign-extended 32bit values,
so this probably limits combine to combine several insns into one.

[Bug target/67317] [x86] Silly code generation for _addcarry_u32/_addcarry_u64

2015-08-25 Thread ubizjak at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67317

--- Comment #4 from Uroš Bizjak ubizjak at gmail dot com ---
(In reply to Segher Boessenkool from comment #3)

 Does this need to be an unspec at all?

Of course not. We are looking to replace unspecs with standard RTXes. Do you
have any recommendation on how we can represent this carry-setting insn to
satisfy combine?

Re: [PATCH] rs6000: Fix PR67344

2015-08-25 Thread David Edelsohn
On Tue, Aug 25, 2015 at 1:08 PM, Segher Boessenkool
seg...@kernel.crashing.org wrote:
 The *andmode3_imm_dot_shifted pattern is a define_insn_and_split,
 like most dot patterns: if its output is not assigned cr0 but some
 other cr reg, it splits to a non-dot insn and a compare.

 Unfortunately that non-dot insn will clobber cr0 as well.  We could
 add another clobber (with =X,x), but then that second alternative
 is never useful; instead, just remove that second alternative.

 Bootstrapped and tested on powerpc64-linux; is this okay for trunk?


 Segher


 2015-08-25  Segher Boessenkool  seg...@kernel.crashing.org

 PR target/67344
 * config/rs6000/rs6000.md (*andmode3_imm_dot_shifted): Change to
 a define_insn, remove second alternative.

Okay.

Thanks, David


[PATCH] Don't ICE on invalid weak decl (PR middle-end/67330)

2015-08-25 Thread Marek Polacek
Here we are ICEing on an invalid code: symtab_node::get asserts that it's
dealing with a function or a static or external variable, but an invalid
decl is rejected too late.  So don't try to mark_weak an invalid decl and
also don't duplicate the declared weak after being used check -- that is
already in mark_weak.
Perhaps we should also punt if (!TARGET_SUPPORTS_WEAK)?

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-08-25  Marek Polacek  pola...@redhat.com

PR middle-end/67330
* varasm.c (declare_weak): Return after giving an error.

* c-common.c (handle_weak_attribute): Don't check whether the
visibility can be changed here.

* gcc.dg/weak/weak-18.c: New test.

diff --git gcc/c-family/c-common.c gcc/c-family/c-common.c
index ff502e5..7691035 100644
--- gcc/c-family/c-common.c
+++ gcc/c-family/c-common.c
@@ -8328,12 +8328,7 @@ handle_weak_attribute (tree *node, tree name,
   return NULL_TREE;
 }
   else if (VAR_OR_FUNCTION_DECL_P (*node))
-{
-  struct symtab_node *n = symtab_node::get (*node);
-  if (n  n-refuse_visibility_changes)
-   error (%+D declared weak after being used, *node);
-  declare_weak (*node);
-}
+declare_weak (*node);
   else
 warning (OPT_Wattributes, %qE attribute ignored, name);
 
diff --git gcc/testsuite/gcc.dg/weak/weak-18.c 
gcc/testsuite/gcc.dg/weak/weak-18.c
index e69de29..ebeb4d5 100644
--- gcc/testsuite/gcc.dg/weak/weak-18.c
+++ gcc/testsuite/gcc.dg/weak/weak-18.c
@@ -0,0 +1,9 @@
+/* PR middle-end/67330 */
+/* { dg-do compile } */
+/* { dg-require-weak  } */
+
+void
+f (void)
+{
+  __attribute__ ((weak)) int a; /* { dg-error weak declaration of .a. must be 
public } */
+}
diff --git gcc/varasm.c gcc/varasm.c
index 7fa2e7b..d9290a1 100644
--- gcc/varasm.c
+++ gcc/varasm.c
@@ -5403,7 +5403,10 @@ declare_weak (tree decl)
 {
   gcc_assert (TREE_CODE (decl) != FUNCTION_DECL || !TREE_ASM_WRITTEN (decl));
   if (! TREE_PUBLIC (decl))
-error (weak declaration of %q+D must be public, decl);
+{
+  error (weak declaration of %q+D must be public, decl);
+  return;
+}
   else if (!TARGET_SUPPORTS_WEAK)
 warning (0, weak declaration of %q+D not supported, decl);
 

Marek


Re: [libgfortran,patch] Remove never-used debugging code

2015-08-25 Thread Steve Kargl
On Tue, Aug 25, 2015 at 07:10:23PM +0200, FX wrote:
  Certainly, the dead code can go.  But,is this changing the library ABI?
  
  troutmask:fvwm:kargl[764] nm /mnt/sgk/work/6/lib/libgfortran.a | grep show_
   T _gfortrani_show_variables
   t show_boolean
   t show_integer
   t show_sep
   t show_string
   T _gfortrani_show_locus
 
 Nope, none of those functions are actually publicly exported.
 They are not in gfortran.map, being either static, or having
 _gfortrani_ prefix which means internal libgfortran use.
 

OK. Just checking.  Thanks for the code cleanup.

-- 
Steve


RE: [PATCH] MIPS: If a test in the MIPS testsuite requires standard library support check the sysroot supports the required test options.

2015-08-25 Thread Moore, Catherine


 -Original Message-
 From: Andrew Bennett [mailto:andrew.benn...@imgtec.com]
 Sent: Tuesday, July 21, 2015 10:15 AM
 To: gcc-patches@gcc.gnu.org
 Cc: Matthew Fortune; Moore, Catherine
 Subject: [PATCH] MIPS: If a test in the MIPS testsuite requires standard
 library support check the sysroot supports the required test options.
 
 Hi,
 
 The recent changes to the MIPS GCC Linux sysroot
 (https://gcc.gnu.org/ml/gcc-patches/2015-06/msg01014.html) have meant
 that the include directory is now not global and is provided only for each
 multi-lib configuration.  This means that for any test in the MIPS GCC
 Testsuite that requires standard library support we need to check if there is 
 a
 multi-lib support for the test options, otherwise it might fail to compile.
 
 This patch adds this support to the testsuite and mips.exp files.  Firstly any
 test that requires standard library support has the implicit option
 (REQUIRES_STDLIB) added to its dg-options.  Secondly in mips.exp a pre-
 processor check is performed to ensure that when expanding a testcase
 containing a #include stdlib.h using the current set of test options we do
 not get file not found errors.  If this happens we mark the testcase as
 unsupported.
 
 The patch has been tested on the mti/img elf/linux-gnu toolchains, and there
 have been no new regressions.
 
 The patch and ChangeLog are below.
 
 Ok to commit?
 
 
Yes.  This looks good.


[gomp4] another routine test

2015-08-25 Thread Nathan Sidwell

I've committed this test to check 2-dimensional loops inside a routine.

nathan
2015-08-24  Nathan Sidwell  nat...@codesourcery.com

	* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: New.

Index: testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
===
--- testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c	(revision 0)
+++ testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c	(revision 0)
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-additional-options -O1 } */
+
+#include stdio.h
+#include openacc.h
+
+#define NUM_WORKERS 16
+#define NUM_VECTORS 32
+#define WIDTH 64
+#define HEIGHT 32
+
+#define WORK_ID(I,N)		\
+  (acc_on_device (acc_device_nvidia)\
+   ? ({unsigned __r;		\
+   __asm__ volatile (mov.u32 %0,%%tid.y; : =r (__r));	\
+   __r; }) : (I % N))
+#define VEC_ID(I,N)		\
+  (acc_on_device (acc_device_nvidia)\
+   ? ({unsigned __r;		\
+   __asm__ volatile (mov.u32 %0,%%tid.x; : =r (__r));	\
+   __r; }) : (I % N))
+
+#pragma acc routine worker
+void __attribute__ ((noinline))
+  WorkVec (int *ptr, int w, int h, int nw, int nv)
+{
+#pragma acc loop worker
+  for (int i = 0; i  h; i++)
+#pragma acc loop vector
+for (int j = 0; j  w; j++)
+  ptr[i*w + j] = (WORK_ID (i, nw)  8) | VEC_ID(j, nv);
+}
+
+int DoWorkVec (int nw)
+{
+  int ary[HEIGHT][WIDTH];
+  int err = 0;
+
+  for (int ix = 0; ix != HEIGHT; ix++)
+for (int jx = 0; jx != WIDTH; jx++)
+  ary[ix][jx] = 0xdeadbeef;
+
+  printf (spawning %d ..., nw); fflush (stdout);
+  
+#pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary)
+  {
+WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS);
+  }
+
+  for (int ix = 0; ix != HEIGHT; ix++)
+for (int jx = 0; jx != WIDTH; jx++)
+  {
+	int exp = ((ix % nw)  8) | (jx % NUM_VECTORS);
+	
+	if (ary[ix][jx] != exp)
+	  {
+	printf (\nary[%d][%d] = %#x expected %#x, ix, jx,
+		ary[ix][jx], exp);
+	err = 1;
+	  }
+  }
+  printf (err ?  failed\n :  ok\n);
+  
+  return err;
+}
+
+int main ()
+{
+  int err = 0;
+
+  for (int W = 1; W = NUM_WORKERS; W = 1)
+err |= DoWorkVec (W);
+
+  return err;
+}


Expand comment on struct switchstr

2015-08-25 Thread Joseph Myers
This patch, extracted from a larger change on gomp-4_0-branch, expands
a comment documenting struct switchstr in gcc.c.

Committed.

2015-08-25  Thomas Schwinge  tho...@codesourcery.com
Joseph Myers  jos...@codesourcery.com

* gcc.c (struct switchstr): Expand comment.

Index: gcc.c
===
--- gcc.c   (revision 227171)
+++ gcc.c   (working copy)
@@ -3068,10 +3068,15 @@
SWITCH_LIVE to indicate this switch is true in a conditional spec.
SWITCH_FALSE to indicate this switch is overridden by a later switch.
SWITCH_IGNORE to indicate this switch should be ignored (used in %S).
-   SWITCH_IGNORE_PERMANENTLY to indicate this switch should be ignored
+   SWITCH_IGNORE_PERMANENTLY to indicate this switch should be ignored.
+   SWITCH_KEEP_FOR_GCC to indicate that this switch, otherwise ignored,
+   should be included in COLLECT_GCC_OPTIONS.
in all do_spec calls afterwards.  Used for %S from self specs.
-   The `validated' field is nonzero if any spec has looked at this switch;
-   if it remains zero at the end of the run, it must be meaningless.  */
+   The `known' field describes whether this is an internal switch.
+   The `validated' field describes whether any spec has looked at this switch;
+   if it remains false at the end of the run, the switch must be meaningless.
+   The `ordering' field is used to temporarily mark switches that have to be
+   kept in a specific order.  */
 
 #define SWITCH_LIVE(1  0)
 #define SWITCH_FALSE   (1  1)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [AArch64][TLSLE][1/3] Add the option -mtls-size for AArch64

2015-08-25 Thread Jiong Wang

Marcus Shawcroft writes:

 On 19 August 2015 at 15:26, Jiong Wang jiong.w...@arm.com wrote:

 2015-08-19  Jiong Wang  jiong.w...@arm.com

 gcc/
   * config/aarch64/aarch64.opt (mtls-size): New entry.
   * config/aarch64/aarch64.c (initialize_aarch64_tls_size): New function.
   (aarch64_override_options_internal): Call initialize_aarch64_tls_size.
   * doc/invoke.texi (AArch64 Options): Document -mtls-size.

 --
 Regards,
 Jiong


 +case AARCH64_CMODEL_TINY:
 +  /* The maximum TLS size allowed under tiny is 1M.  */
 +  if (aarch64_tls_size  20)
 + aarch64_tls_size = 20;

 The only valid values of aarch64_tls_size handled/expected by the
 remainder of the patch set is 12,24,32,48 so setting the value to 20
 here doesn;t make sense.

Thanks for pointing this out, how about the new patch attached?

2015-08-25  Jiong Wang  jiong.w...@arm.com

gcc/
  * config/aarch64/aarch64.opt (mtls-size): New entry.
  * config/aarch64/aarch64.c (initialize_aarch64_tls_size): New function.
  (aarch64_override_options_internal): Call initialize_aarch64_tls_size.
  * doc/invoke.texi (AArch64 Options): Document -mtls-size.

commit 36736a1a2133ffc949d3e00efdced8ef2c53cddd
Author: Jiong Wang jiong.w...@arm.com
Date:   Tue Aug 25 11:13:44 2015 +0100

1

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 382be2c..318b852 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7492,6 +7492,40 @@ aarch64_parse_one_override_token (const char* token,
   return;
 }
 
+/* A checking mechanism for the implementation of the tls size.  */
+
+static void
+initialize_aarch64_tls_size (struct gcc_options *opts)
+{
+  if (aarch64_tls_size == 0)
+aarch64_tls_size = 24;
+
+  switch (opts-x_aarch64_cmodel_var)
+{
+case AARCH64_CMODEL_TINY:
+  /* Both the default and maximum TLS size allowed under tiny is 1M which
+	 needs two instructions to address, so we clamp the size to 24.  */
+  if (aarch64_tls_size  24)
+	aarch64_tls_size = 24;
+  break;
+case AARCH64_CMODEL_SMALL:
+  /* The maximum TLS size allowed under small is 4G.  */
+  if (aarch64_tls_size  32)
+	aarch64_tls_size = 32;
+  break;
+case AARCH64_CMODEL_LARGE:
+  /* The maximum TLS size allowed under large is 16E.
+	 FIXME: 16E should be 64bit, we only support 48bit offset now.  */
+  if (aarch64_tls_size  48)
+	aarch64_tls_size = 48;
+  break;
+default:
+  gcc_unreachable ();
+}
+
+  return;
+}
+
 /* Parse STRING looking for options in the format:
  string	:: option:string
  option	:: name=substring
@@ -7584,6 +7618,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
 }
 
   initialize_aarch64_code_model (opts);
+  initialize_aarch64_tls_size (opts);
 
   aarch64_override_options_after_change_1 (opts);
 }
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 37c2c50..8642bdb 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -96,6 +96,25 @@ mtls-dialect=
 Target RejectNegative Joined Enum(tls_type) Var(aarch64_tls_dialect) Init(TLS_DESCRIPTORS) Save
 Specify TLS dialect
 
+mtls-size=
+Target RejectNegative Joined Var(aarch64_tls_size) Enum(aarch64_tls_size)
+Specifies bit size of immediate TLS offsets.  Valid values are 12, 24, 32, 48.
+
+Enum
+Name(aarch64_tls_size) Type(int)
+
+EnumValue
+Enum(aarch64_tls_size) String(12) Value(12)
+
+EnumValue
+Enum(aarch64_tls_size) String(24) Value(24)
+
+EnumValue
+Enum(aarch64_tls_size) String(32) Value(32)
+
+EnumValue
+Enum(aarch64_tls_size) String(48) Value(48)
+
 march=
 Target RejectNegative ToLower Joined Var(aarch64_arch_string)
 -march=ARCH	Use features of architecture ARCH
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 27be317..f990bef 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -514,6 +514,7 @@ Objective-C and Objective-C++ Dialects}.
 -mstrict-align @gol
 -momit-leaf-frame-pointer  -mno-omit-leaf-frame-pointer @gol
 -mtls-dialect=desc  -mtls-dialect=traditional @gol
+-mtls-size=@var{size} @gol
 -mfix-cortex-a53-835769  -mno-fix-cortex-a53-835769 @gol
 -mfix-cortex-a53-843419  -mno-fix-cortex-a53-843419 @gol
 -march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}}
@@ -12409,6 +12410,11 @@ of TLS variables.  This is the default.
 Use traditional TLS as the thread-local storage mechanism for dynamic accesses
 of TLS variables.
 
+@item -mtls-size=@var{size}
+@opindex mtls-size
+Specify bit size of immediate TLS offsets.  Valid values are 12, 24, 32, 48.
+This option depends on binutils higher than 2.25.
+
 @item -mfix-cortex-a53-835769
 @itemx -mno-fix-cortex-a53-835769
 @opindex mfix-cortex-a53-835769


Re: Forwarding -foffload=[...] from the driver (compile-time) to libgomp (run-time)

2015-08-25 Thread Joseph Myers
On reviewing in more detail the changes to pass offloading targets
from the driver to libgomp at link time to identify the minimal
self-contained pieces that can go to trunk, I found that the use of
fnmatch to match against target names was completely unnecessary; the
ISO C90 functions strstr and strncmp could be used instead, so
avoiding the need to add configure tests for fnmatch.  This patch duly
removes the use of and configure tests for fnmatch.

Will commit to gomp-4_0-branch subject to test results.

2015-08-25  Joseph Myers  jos...@codesourcery.com

* plugin/configfrag.ac: Don't test for fnmatch.h or fnmatch.
* configure, config.h.in: Regenerate.
* target.c [PLUGIN_SUPPORT]: Don't include fnmatch.h.
(offload_target_to_plugin_name): Use strstr and strncmp instead of
fnmatch.

Index: libgomp/config.h.in
===
--- libgomp/config.h.in (revision 227169)
+++ libgomp/config.h.in (working copy)
@@ -24,12 +24,6 @@
 /* Define to 1 if you have the dlfcn.h header file. */
 #undef HAVE_DLFCN_H
 
-/* Define to 1 if you have the `fnmatch' function. */
-#undef HAVE_FNMATCH
-
-/* Define to 1 if you have the fnmatch.h header file. */
-#undef HAVE_FNMATCH_H
-
 /* Define to 1 if you have the `getloadavg' function. */
 #undef HAVE_GETLOADAVG
 
Index: libgomp/target.c
===
--- libgomp/target.c(revision 227169)
+++ libgomp/target.c(working copy)
@@ -41,7 +41,6 @@
 
 #ifdef PLUGIN_SUPPORT
 #include dlfcn.h
-#include fnmatch.h
 #include plugin-suffix.h
 #endif
 
@@ -1271,9 +1270,9 @@
 static const char *
 offload_target_to_plugin_name (const char *offload_target)
 {
-  if (fnmatch (*-intelmic*, offload_target, 0) == 0)
+  if (strstr (offload_target, -intelmic) != NULL)
 return intelmic;
-  if (fnmatch (nvptx*, offload_target, 0) == 0)
+  if (strncmp (offload_target, nvptx, 5) == 0)
 return nvptx;
   gomp_fatal (Unknown offload target: %s, offload_target);
 }
Index: libgomp/configure
===
--- libgomp/configure   (revision 227169)
+++ libgomp/configure   (working copy)
@@ -15119,33 +15119,6 @@
 offload_targets=
 
 plugin_support=yes
-for ac_header in fnmatch.h
-do :
-  ac_fn_c_check_header_mongrel $LINENO fnmatch.h ac_cv_header_fnmatch_h 
$ac_includes_default
-if test x$ac_cv_header_fnmatch_h = xyes; then :
-  cat confdefs.h _ACEOF
-#define HAVE_FNMATCH_H 1
-_ACEOF
-
-else
-  plugin_support=no
-fi
-
-done
-
-for ac_func in fnmatch
-do :
-  ac_fn_c_check_func $LINENO fnmatch ac_cv_func_fnmatch
-if test x$ac_cv_func_fnmatch = xyes; then :
-  cat confdefs.h _ACEOF
-#define HAVE_FNMATCH 1
-_ACEOF
-
-else
-  plugin_support=no
-fi
-done
-
 { $as_echo $as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl 5
 $as_echo_n checking for dlsym in -ldl...  6; }
 if test ${ac_cv_lib_dl_dlsym+set} = set; then :
Index: libgomp/plugin/configfrag.ac
===
--- libgomp/plugin/configfrag.ac(revision 227169)
+++ libgomp/plugin/configfrag.ac(working copy)
@@ -29,8 +29,6 @@
 offload_targets=
 AC_SUBST(offload_targets)
 plugin_support=yes
-AC_CHECK_HEADERS([fnmatch.h], , [plugin_support=no])
-AC_CHECK_FUNCS([fnmatch], , [plugin_support=no])
 AC_CHECK_LIB(dl, dlsym, , [plugin_support=no])
 if test x$plugin_support = xyes; then
   AC_DEFINE(PLUGIN_SUPPORT, 1,

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH, rs6000] Fix vec_shr define_expand

2015-08-25 Thread David Edelsohn
On Tue, Aug 25, 2015 at 10:14 AM, Pat Haugen
pthau...@linux.vnet.ibm.com wrote:
 The following patch fixes the vec_shr expander to do a shift instead of a
 rotate. CPU2006 benchmark 482.sphinx3 recently started failing due to this
 issue.  Bootstrapped and tested on ppc64/ppc64le with no new regressions. Ok
 for trunk? And ok for 4.9/5 (with equivalent change to vec_shl expander
 which exists in those releases) after bootstrap/regtest?

 -Pat


 2015-08-25  Pat Haugen  pthau...@us.ibm.com

 * config/rs6000/vector.md (vec_shr_mode): Fix to do a shift
 instead of a rotate.

 gcc/testsuite:
 * gcc.target/powerpc/vec-shr.c: New.

This is okay.

As Peter and I noticed

+  zero_reg = gen_reg_rtx(MODEmode);

This needs a space after gen_rtx_rtx.

Thanks, David


Re: [testsuite] Clean up effective_target cache

2015-08-25 Thread Mike Stump
On Aug 25, 2015, at 1:14 AM, Christophe Lyon christophe.l...@linaro.org wrote:
 Some subsets of the tests override ALWAYS_CXXFLAGS or
 TEST_ALWAYS_FLAGS and perform effective_target support tests using
 these modified flags.

 This patch adds a new function 'clear_effective_target_cache', which
 is called at the end of every .exp file which overrides
 ALWAYS_CXXFLAGS or TEST_ALWAYS_FLAGS.

So, a simple English directive somewhere that says, if one changes 
ALWAYS_CXXFLAGS or TEST_ALWAYS_FLAGS then they should do a 
clear_effective_target_cache at the end as the target cache can make decisions 
based upon the flags, and those decisions need to be redone when the flags 
change would be nice.

I do wonder, do we need to reexamine when setting the flags?  I’m thinking of a 
sequence like: non-thumb default, is_thumb, set flags (thumb), is_thumb.  
Anyway, safe to punt this until someone discovers it or is reasonable sure it 
happens.

Anyway, all looks good.  Ok.

 However, I noticed that lib/g++.exp changes ALWAYS_CXXFLAGS, but does
 not appear to restore it. In doubt, I didn't change it.

Yeah, I examined it.  It seems like it might not matter, as anyone setting and 
unsetting would come in cleared, and if they didn’t, it should be roughly the 
same exact state, meaning, no clearing necessary.  I think it is safe to punt 
this until someone finds a bug or can see a way that it would matter.  I also 
don’t think it would hurt to clear, if someone wanted to refactor the code a 
bit and make the clearing and the cleanup a little more automatic.  I’m 
thinking of a RAII style code in which the dtor runs the clear.  Not sure if 
that is even possible in tcl.  [ checking ] Nope, maybe not.  Oh well.

Re: [PATCH 14/15][ARM/AArch64 Testsuite]Add test of vcvt{,_high}_i{f32_f16,f16_f32}

2015-08-25 Thread Alan Lawrence
Sorry - wrong version posted. The hunk for add_options_for_arm_neon_fp16 has
moved to the previous patch! This version also fixes some whitespace issues.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c: New.
* lib/target-supports.exp
(check_effective_target_arm_neon_fp16_hw_ok): New.
---
 .../aarch64/advsimd-intrinsics/vcvt_f16.c  | 98 ++
 gcc/testsuite/lib/target-supports.exp  | 15 
 2 files changed, 113 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c
new file mode 100644
index 000..a2cfd38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c
@@ -0,0 +1,98 @@
+/* { dg-require-effective-target arm_neon_fp16_hw_ok { target { arm*-*-* } } } 
*/
+#include arm_neon.h
+#include arm-neon-ref.h
+#include compute-ref-data.h
+#include math.h
+
+/* Expected results for vcvt.  */
+VECT_VAR_DECL (expected,hfloat,32,4) [] = { 0x4180, 0x4170,
+   0x4160, 0x4150 };
+VECT_VAR_DECL (expected,hfloat,16,4) [] = { 0x3e00, 0x4100, 0x4300, 0x4480 };
+
+/* Expected results for vcvt_high_f32_f16.  */
+VECT_VAR_DECL (expected_high,hfloat,32,4) [] = { 0xc140, 0xc130,
+0xc120, 0xc110 };
+/* Expected results for vcvt_high_f16_f32.  */
+VECT_VAR_DECL (expected_high,hfloat,16,8) [] = { 0x4000, 0x4000, 0x4000, 
0x4000,
+0xcc00, 0xcb80, 0xcb00, 0xca80 
};
+
+void
+exec_vcvt (void)
+{
+  clean_results ();
+
+#define TEST_MSG vcvt_f32_f16
+  {
+VECT_VAR_DECL (buffer_src, float, 16, 4) [] = { 16.0, 15.0, 14.0, 13.0 };
+
+DECL_VARIABLE (vector_src, float, 16, 4);
+
+VLOAD (vector_src, buffer_src, , float, f, 16, 4);
+DECL_VARIABLE (vector_res, float, 32, 4) =
+   vcvt_f32_f16 (VECT_VAR (vector_src, float, 16, 4));
+vst1q_f32 (VECT_VAR (result, float, 32, 4),
+  VECT_VAR (vector_res, float, 32, 4));
+
+CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, );
+  }
+#undef TEST_MSG
+
+  clean_results ();
+
+#define TEST_MSG vcvt_f16_f32
+  {
+VECT_VAR_DECL (buffer_src, float, 32, 4) [] = { 1.5, 2.5, 3.5, 4.5 };
+DECL_VARIABLE (vector_src, float, 32, 4);
+
+VLOAD (vector_src, buffer_src, q, float, f, 32, 4);
+DECL_VARIABLE (vector_res, float, 16, 4) =
+  vcvt_f16_f32 (VECT_VAR (vector_src, float, 32, 4));
+vst1_f16 (VECT_VAR (result, float, 16, 4),
+ VECT_VAR (vector_res, float, 16 ,4));
+
+CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected, );
+  }
+#undef TEST_MSG
+
+#if defined (__aarch64__)
+  clean_results ();
+
+#define TEST_MSG vcvt_high_f32_f16
+  {
+DECL_VARIABLE (vector_src, float, 16, 8);
+VLOAD (vector_src, buffer, q, float, f, 16, 8);
+DECL_VARIABLE (vector_res, float, 32, 4);
+VECT_VAR (vector_res, float, 32, 4) =
+  vcvt_high_f32_f16 (VECT_VAR (vector_src, float, 16, 8));
+vst1q_f32 (VECT_VAR (result, float, 32, 4),
+  VECT_VAR (vector_res, float, 32, 4));
+CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected_high, );
+  }
+#undef TEST_MSG
+  clean_results ();
+
+#define TEST_MSG vcvt_high_f16_f32
+  {
+DECL_VARIABLE (vector_low, float, 16, 4);
+VDUP (vector_low, , float, f, 16, 4, 2.0);
+
+DECL_VARIABLE (vector_src, float, 32, 4);
+VLOAD (vector_src, buffer, q, float, f, 32, 4);
+
+DECL_VARIABLE (vector_res, float, 16, 8) =
+  vcvt_high_f16_f32 (VECT_VAR (vector_low, float, 16, 4),
+VECT_VAR (vector_src, float, 32, 4));
+vst1q_f16 (VECT_VAR (result, float, 16, 8),
+  VECT_VAR (vector_res, float, 16, 8));
+
+CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_high, );
+  }
+#endif
+}
+
+int
+main (void)
+{
+  exec_vcvt ();
+  return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 9aec02d..0a22c95 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2730,6 +2730,21 @@ proc check_effective_target_arm_neon_fp16_ok { } {
check_effective_target_arm_neon_fp16_ok_nocache]
 }
 
+proc check_effective_target_arm_neon_fp16_hw_ok { } {
+if {! [check_effective_target_arm_neon_fp16_ok] } {
+   return 0
+}
+global et_arm_neon_fp16_flags
+check_runtime_nocache arm_neon_fp16_hw_ok {
+   int
+   main (int argc, char **argv)
+   {
+ asm (vcvt.f32.f16 q1, d0);
+ return 0;
+   }
+} $et_arm_neon_fp16_flags
+}
+
 proc add_options_for_arm_neon_fp16 { flags } {
 if { ! [check_effective_target_arm_neon_fp16_ok] } {
return $flags
-- 
1.8.3



[Bug middle-end/67351] Missed optimisation on 64-bit field compared to 32-bit

2015-08-25 Thread linux at carewolf dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67351

--- Comment #1 from Allan Jensen linux at carewolf dot com ---
Created attachment 36254
  -- https://gcc.gnu.org/bugzilla/attachment.cgi?id=36254action=edit
Compiled test assembler


[hsa] Fix omp declare target support

2015-08-25 Thread Martin Jambor
Hi,

it was brought to my attention that omp declare target functions were
not properly translated to HSA functions.  Until the grand shceme with
an IPA pass is complete, this will do.  And having a single predicate
to decide what should be an HSA function cannot be bad.  Committed to
the hsa branch.

Thanks,

Martin


2015-08-25  Martin Jambor  mjam...@suse.cz

* hsa.h (hsa_callable_function_p): Declare.
* hsa.c (hsa_callable_function_p): New function.
* hsa-gen.c (gen_hsa_insns_for_call): Use it.
(pass_gen_hsail::execute): Likewise.
---
 gcc/ChangeLog.hsa | 7 +++
 gcc/hsa-gen.c | 5 ++---
 gcc/hsa.c | 9 +
 gcc/hsa.h | 1 +
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog.hsa b/gcc/ChangeLog.hsa
index 4ad8414..922c917 100644
--- a/gcc/ChangeLog.hsa
+++ b/gcc/ChangeLog.hsa
@@ -1,5 +1,12 @@
 2015-08-25  Martin Jambor  mjam...@suse.cz
 
+   * hsa.h (hsa_callable_function_p): Declare.
+   * hsa.c (hsa_callable_function_p): New function.
+   * hsa-gen.c (gen_hsa_insns_for_call): Use it.
+   (pass_gen_hsail::execute): Likewise.
+
+2015-08-25  Martin Jambor  mjam...@suse.cz
+
* hsa-gen.c (gen_hsa_unaryop_for_builtin): New function.
 
 2015-08-25  Martin Jambor  mjam...@suse.cz
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 1e23996..7190dce 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -3255,7 +3255,7 @@ gen_hsa_insns_for_call (gimple stmt, hsa_bb *hbb,
  return;
}
 
-  if (lookup_attribute (hsafunc, DECL_ATTRIBUTES (function_decl)))
+  if (hsa_callable_function_p (function_decl))
 gen_hsa_insns_for_direct_call (stmt, hbb, ssa_map);
   else if (!gen_hsa_insns_for_known_library_call (stmt, hbb, ssa_map))
sorry (HSA does support only call for functions with 'hsafunc' 
@@ -4102,8 +4102,7 @@ pass_gen_hsail::execute (function *)
   || lookup_attribute (hsakernel,
   DECL_ATTRIBUTES (current_function_decl)))
 return generate_hsa (true);
-  else if (lookup_attribute (hsafunc,
-DECL_ATTRIBUTES (current_function_decl)))
+  else if (hsa_callable_function_p (current_function_decl))
 return generate_hsa (false);
   else
 return wrap_all_hsa_calls ();
diff --git a/gcc/hsa.c b/gcc/hsa.c
index 13a2ace..4ad44fe 100644
--- a/gcc/hsa.c
+++ b/gcc/hsa.c
@@ -103,6 +103,15 @@ hash_table hsa_free_symbol_hasher 
*hsa_global_variable_symbols;
 /* True if compilation unit-wide data are already allocated and initialized.  
*/
 static bool compilation_unit_data_initialized;
 
+/* Return true if FNDECL represents an HSA-callable function.  */
+
+bool
+hsa_callable_function_p (tree fndecl)
+{
+  return lookup_attribute (hsafunc, DECL_ATTRIBUTES (fndecl))
+|| lookup_attribute (omp declare target, DECL_ATTRIBUTES (fndecl));
+}
+
 /* Allocate HSA structures that are are used when dealing with different
functions.  */
 
diff --git a/gcc/hsa.h b/gcc/hsa.h
index 3956676..f9bcc80 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -898,6 +898,7 @@ extern struct hsa_function_representation *hsa_cfun;
 extern hash_table hsa_free_symbol_hasher *hsa_global_variable_symbols;
 extern hash_map tree, vec char * * *hsa_decl_kernel_dependencies;
 extern unsigned hsa_kernel_calls_counter;
+bool hsa_callable_function_p (tree fndecl);
 void hsa_init_compilation_unit_data (void);
 void hsa_deinit_compilation_unit_data (void);
 bool hsa_machine_large_p (void);
-- 
2.4.6



[Bug middle-end/67330] ICE handling weak attributes

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67330

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 Status|NEW |ASSIGNED
   Assignee|unassigned at gcc dot gnu.org  |mpolacek at gcc dot 
gnu.org
   Target Milestone|--- |6.0

--- Comment #5 from Marek Polacek mpolacek at gcc dot gnu.org ---
Let me see what others think.


[Bug tree-optimization/66372] [6 Regression] ICE on valid code at -O3 on x86_64-linux-gnu

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66372

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #6 from Marek Polacek mpolacek at gcc dot gnu.org ---
This seems to be fixed now.


[nvptx] fix thinko

2015-08-25 Thread Nathan Sidwell
I've committed this.  A thinko regarding what asm_operands wants for an 
argument.  Fixes a couple of tests.


nathan
2015-08-25  Nathan Sidwell  nat...@acm.org

	* config/nvptx/nvptx.c (nvptx_write_function_decl): Reformat.
	(nvptx_reorg_subreg): Pass insn pattern to asm_operands.

Index: gcc/config/nvptx/nvptx.c
===
--- gcc/config/nvptx/nvptx.c	(revision 227128)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -321,7 +321,8 @@ nvptx_write_function_decl (std::stringst
 
   /* Declare argument types.  */
   if ((args != NULL_TREE
-!(TREE_CODE (args) == TREE_LIST  TREE_VALUE (args) == void_type_node))
+!(TREE_CODE (args) == TREE_LIST
+	 TREE_VALUE (args) == void_type_node))
   || is_main
   || return_in_mem
   || DECL_STATIC_CHAIN (decl))
@@ -1917,7 +1918,7 @@ nvptx_reorg_subreg (void)
 {
   next = NEXT_INSN (insn);
   if (!NONDEBUG_INSN_P (insn)
-	  || asm_noperands (insn) = 0
+	  || asm_noperands (PATTERN (insn)) = 0
 	  || GET_CODE (PATTERN (insn)) == USE
 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
 	continue;


[PATCH, rs6000] Fix vec_shr define_expand

2015-08-25 Thread Pat Haugen
The following patch fixes the vec_shr expander to do a shift instead of 
a rotate. CPU2006 benchmark 482.sphinx3 recently started failing due to 
this issue.  Bootstrapped and tested on ppc64/ppc64le with no new 
regressions. Ok for trunk? And ok for 4.9/5 (with equivalent change to 
vec_shl expander which exists in those releases) after bootstrap/regtest?


-Pat


2015-08-25  Pat Haugen  pthau...@us.ibm.com

* config/rs6000/vector.md (vec_shr_mode): Fix to do a shift
instead of a rotate.


gcc/testsuite:
* gcc.target/powerpc/vec-shr.c: New.



Index: gcc/config/rs6000/vector.md
===
--- gcc/config/rs6000/vector.md	(revision 227041)
+++ gcc/config/rs6000/vector.md	(working copy)
@@ -977,6 +977,8 @@ (define_expand movmisalignmode
 ;; General shift amounts can be supported using vsro + vsr. We're
 ;; not expecting to see these yet (the vectorizer currently
 ;; generates only shifts by a whole number of vector elements).
+;; Note that the vec_shr operation is actually defined as 
+;; 'shift toward element 0' so is a shr for LE and shl for BE.
 (define_expand vec_shr_mode
   [(match_operand:VEC_L 0 vlogical_operand )
(match_operand:VEC_L 1 vlogical_operand )
@@ -987,6 +989,7 @@ (define_expand vec_shr_mode
   rtx bitshift = operands[2];
   rtx shift;
   rtx insn;
+  rtx zero_reg, op1, op2;
   HOST_WIDE_INT bitshift_val;
   HOST_WIDE_INT byteshift_val;
 
@@ -996,19 +999,29 @@ (define_expand vec_shr_mode
   if (bitshift_val  0x7)
 FAIL;
   byteshift_val = (bitshift_val  3);
+  zero_reg = gen_reg_rtx(MODEmode);
+  emit_move_insn (zero_reg, CONST0_RTX (MODEmode));
   if (!BYTES_BIG_ENDIAN)
-byteshift_val = 16 - byteshift_val;
+{
+  byteshift_val = 16 - byteshift_val;
+  op1 = zero_reg;
+  op2 = operands[1];
+}
+  else
+{
+  op1 = operands[1];
+  op2 = zero_reg;
+}
+
   if (TARGET_VSX  (byteshift_val  0x3) == 0)
 {
   shift = gen_rtx_CONST_INT (QImode, byteshift_val  2);
-  insn = gen_vsx_xxsldwi_mode (operands[0], operands[1], operands[1],
- shift);
+  insn = gen_vsx_xxsldwi_mode (operands[0], op1, op2, shift);
 }
   else
 {
   shift = gen_rtx_CONST_INT (QImode, byteshift_val);
-  insn = gen_altivec_vsldoi_mode (operands[0], operands[1], operands[1],
-	shift);
+  insn = gen_altivec_vsldoi_mode (operands[0], op1, op2, shift);
 }
 
   emit_insn (insn);
Index: gcc/testsuite/gcc.target/powerpc/vec-shr.c
===
--- gcc/testsuite/gcc.target/powerpc/vec-shr.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-shr.c	(working copy)
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options -O3 -fno-inline } */
+
+#include stdlib.h
+
+typedef struct { double r, i; } complex;
+#define LEN 30
+complex c[LEN];
+double d[LEN];
+
+void
+foo (complex *c, double *d, int len1)
+{
+  int i;
+  for (i = 0; i  len1; i++)
+{
+  c[i].r = d[i];
+  c[i].i = 0.0;
+}
+}
+
+int
+main (void)
+{
+  int i;
+  for (i = 0; i  LEN; i++)
+d[i] = (double) i;
+  foo (c, d, LEN);
+  for (i=0;iLEN;i++)
+if ((c[i].r != (double) i) || (c[i].i != 0.0))
+  abort ();
+  return 0;
+}
+


Re: [Patch] Add to the libgfortran/newlib bodge to detect ftruncate support in ARM/AArch64/SH

2015-08-25 Thread James Greenhalgh

On Fri, Aug 21, 2015 at 11:05:47AM +0100, James Greenhalgh wrote:
 On Thu, Aug 20, 2015 at 10:50:47AM +0100, Marcus Shawcroft wrote:
  On 20 August 2015 at 09:31, James Greenhalgh james.greenha...@arm.com 
  wrote:
  
   Hi,
  
   Steve's patch in 2013 [1] to fix the MIPS newlib/libgfortran build
   causes subtle issues for an ARM/AArch64 newlib/libgfortran build. The
   problem is that ARM/AArch64 (and SH) define a stub function for
   ftruncate, which we would previously have auto-detected, but which is not
   part of the hardwiring Steve added.
  
   Continuing the tradition of building bodge on bodge on bodge, this patch
   hardwires HAVE_FTRUNCATE on for ARM/AArch64/SH, which does fix the issue
   I was seeing.
 
  This is the second breakage I'm aware of due to the introduction of
  this hardwire code, the first being related to strtold.  My
  recollection is that it is only the mips target that requires the
  newlib API hardwiring. Ideally we should rely only on the
  AC_CHECK_FUNCS_ONCE probe code and avoid the hardwire entirely.
 
  Perhaps a better approach for trunk would be something along the lines of:
 
  case ${host}--x${with_newlib} in
  mips*--xyes)
hardwire_newlib=1;;
  esac
  if test ${hardwire_newlib:-0} -eq 1; then
... existing AC_DEFINES hardwire code
  else
... existing AC_CHECK_FUNCS_ONCE probe code
  fi
 
  In effect limiting the hardwire to just the target which is unable to
  probe.  For backport to 4.9 and 5 I think James' more conservative
  patch is probably more appropriate.
 
  What do folks think?

 (+CC fort...@gcc.gnu.org - who I should have CCed from the start).

 This runs in to issues with a newlib build [1] (newlib provides a 'kill'
 symbol for linking, but does not provide a declaration in signal.h, so
 we take a -Werror=implicit-function-declaration).

This is what the patch you suggested would look like.

I've sent a patch to the newlib list [1] which unconditionally declares
'kill'. With that in place, we can then autodetect the presence of the
functions newlib provides. I'd expect that you would need to apply that
newlib patch if you were testing this patch locally.

I've tested this with a build of arm-none-eabi and aarch64-none-elf to
check that I now get HAVE_FTRUNCATE defined, and that the build completes.

OK?

Thanks,
James

---
2015-08-25  James Greenhalgh  james.greenha...@arm.com

* configure.ac: Auto-detect newlib function support unless we
know there are issues when configuring for a host.
* configure: Regenerate.

---
[1]: https://sourceware.org/ml/newlib/2015/msg00632.html

diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 35a8b39..1e9914c 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -273,8 +273,13 @@ GCC_HEADER_STDINT(gstdint.h)
 
 AC_CHECK_MEMBERS([struct stat.st_blksize, struct stat.st_blocks, struct stat.st_rdev])
 
+case ${host}--x${with_newlib} in
+  mips*--xyes)
+hardwire_newlib=1;;
+esac
+
 # Check for library functions.
-if test x${with_newlib} = xyes; then
+if test ${hardwire_newlib:-0} -eq 1; then
# We are being configured with a cross compiler.  AC_REPLACE_FUNCS
# may not work correctly, because the compiler may not be able to
# link executables.


Re: [Patch] Add to the libgfortran/newlib bodge to detect ftruncate support in ARM/AArch64/SH

2015-08-25 Thread FX
 2015-08-25  James Greenhalgh  james.greenha...@arm.com
 
   * configure.ac: Auto-detect newlib function support unless we
   know there are issues when configuring for a host.
   * configure: Regenerate.

Thanks for CC’ing the fortran list.

Given that this is newlib-specific code, even though it’s in libgfortran 
configury, you should decide and commit what’s best. I don’t think we have any 
newlib expert in the Fortran maintainers.

Wait for 48 hours to see if anyone else objects, though.

Cheers,
FX

[Bug middle-end/67005] [5/6 Regression] ICE: in verify_loop_structure, at cfgloop.c:1647 (loop with header n not in loop tree)

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67005

--- Comment #7 from Marek Polacek mpolacek at gcc dot gnu.org ---
(In reply to Marek Polacek from comment #6)
 Hm, adding || (e2-flags  EDGE_IRREDUCIBLE_LOOP) doesn't work; the E2 edge
 doesn't have the EDGE_IRREDUCIBLE_LOOP flag, even if I recompute that flag
 via mark_irreducible_loops.  So maybe set LOOPS_NEED_FIXUP every time we
 remove an edge?

I did some measurements on a GCC regtest.  The edge removal was triggered
~13000 times, but for only ~4000 out of that was loop_exit_edge_p
(bb-loop_father, e) true -- setting LOOPS_NEED_FIXUP unconditionally would be
maybe too expensive thus.


Re: [PATCH 1/2] driver: support state cleanup

2015-08-25 Thread Joseph Myers
On Thu, 6 Aug 2015, David Malcolm wrote:

 gcc/ChangeLog:
   * gcc-main.c (main): Add params to driver ctor.
   * gcc.c (class env_manager): New.
   (env): New global.
   (env_manager::init): New.
   (env_manager::get): New.
   (env_manager::xput): New.
   (env_manager::restore): New.
   Poison getenv and putenv.
   (DEFAULT_TARGET_SYSTEM_ROOT): New.
   (target_system_root): Update initialization to use
   DEFAULT_TARGET_SYSTEM_ROOT.
   (struct spec_list): Add field default_ptr.
   (INIT_STATIC_SPEC): Initialize new field default_ptr.
   (init_spec): Likewise.
   (set_spec): Clear field default_ptr.
   (read_specs): Free spec and buffer.
   (xputenv): Reimplement in terms of env_manager.
   (process_command): Replace ::getenv calls with calls to the
   env_manager singleton.
   (process_brace_body): Free string in three places.
   (driver::driver): New.
   (driver::~driver): New.
   (used_arg): Convert from a function to...
   (class used_arg_t): ...this class, and...
   (used_arg): ...this new global instance.
   (used_arg_t::finalize): New function.
   (getenv_spec_function): Add const to local value.  Replace
   ::getenv call with call to the env_manager singleton.
   (path_prefix_reset): New function.
   (driver::finalize): New function.
   * gcc.h (driver::driver): New.
   (driver::~driver): New.
   (driver::finalize): New.

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Indirect jumps

2015-08-25 Thread Nathan Sidwell
Ptx is one of those rare (unique?) machines that doesn't  have an indirect 
branch.  optabs  is prepared for such  a target and emits a sorry when an 
indirect branch is needed.  However it then goes on to try and  emit such an 
instruction and ends up ICEing.


Fixed thusly, ok?  (Or is the right solution to define a dummy indirect branch 
in the PTX  md file?)


nathan
2015-08-25  Nathan Sidwell  nat...@acm.org

	* optabs (emit_indirect_jump): Don't try an emit a jump if the
	target doesn't  have one.

Index: gcc/optabs.c
===
--- gcc/optabs.c	(revision 227128)
+++ gcc/optabs.c	(working copy)
@@ -4488,11 +4488,13 @@ emit_indirect_jump (rtx loc)
 {
   if (!targetm.have_indirect_jump ())
 sorry (indirect jumps are not available on this target);
-
-  struct expand_operand ops[1];
-  create_address_operand (ops[0], loc);
-  expand_jump_insn (targetm.code_for_indirect_jump, 1, ops);
-  emit_barrier ();
+  else
+{
+  struct expand_operand ops[1];
+  create_address_operand (ops[0], loc);
+  expand_jump_insn (targetm.code_for_indirect_jump, 1, ops);
+  emit_barrier ();
+}
 }
 
 


Re: [PATCH 14/15][ARM/AArch64 Testsuite]Add test of vcvt{,_high}_i{f32_f16,f16_f32}

2015-08-25 Thread Christophe Lyon
On 25 August 2015 at 15:57, Alan Lawrence alan.lawre...@arm.com wrote:
 Sorry - wrong version posted. The hunk for add_options_for_arm_neon_fp16 has
 moved to the previous patch! This version also fixes some whitespace issues.


This looks OK to me now, thanks.

 gcc/testsuite/ChangeLog:

 * gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c: New.
 * lib/target-supports.exp
 (check_effective_target_arm_neon_fp16_hw_ok): New.
 ---
  .../aarch64/advsimd-intrinsics/vcvt_f16.c  | 98 
 ++
  gcc/testsuite/lib/target-supports.exp  | 15 
  2 files changed, 113 insertions(+)
  create mode 100644 
 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c

 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c 
 b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c
 new file mode 100644
 index 000..a2cfd38
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c
 @@ -0,0 +1,98 @@
 +/* { dg-require-effective-target arm_neon_fp16_hw_ok { target { arm*-*-* } } 
 } */
 +#include arm_neon.h
 +#include arm-neon-ref.h
 +#include compute-ref-data.h
 +#include math.h
 +
 +/* Expected results for vcvt.  */
 +VECT_VAR_DECL (expected,hfloat,32,4) [] = { 0x4180, 0x4170,
 +   0x4160, 0x4150 };
 +VECT_VAR_DECL (expected,hfloat,16,4) [] = { 0x3e00, 0x4100, 0x4300, 0x4480 };
 +
 +/* Expected results for vcvt_high_f32_f16.  */
 +VECT_VAR_DECL (expected_high,hfloat,32,4) [] = { 0xc140, 0xc130,
 +0xc120, 0xc110 };
 +/* Expected results for vcvt_high_f16_f32.  */
 +VECT_VAR_DECL (expected_high,hfloat,16,8) [] = { 0x4000, 0x4000, 0x4000, 
 0x4000,
 +0xcc00, 0xcb80, 0xcb00, 
 0xca80 };
 +
 +void
 +exec_vcvt (void)
 +{
 +  clean_results ();
 +
 +#define TEST_MSG vcvt_f32_f16
 +  {
 +VECT_VAR_DECL (buffer_src, float, 16, 4) [] = { 16.0, 15.0, 14.0, 13.0 };
 +
 +DECL_VARIABLE (vector_src, float, 16, 4);
 +
 +VLOAD (vector_src, buffer_src, , float, f, 16, 4);
 +DECL_VARIABLE (vector_res, float, 32, 4) =
 +   vcvt_f32_f16 (VECT_VAR (vector_src, float, 16, 4));
 +vst1q_f32 (VECT_VAR (result, float, 32, 4),
 +  VECT_VAR (vector_res, float, 32, 4));
 +
 +CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, );
 +  }
 +#undef TEST_MSG
 +
 +  clean_results ();
 +
 +#define TEST_MSG vcvt_f16_f32
 +  {
 +VECT_VAR_DECL (buffer_src, float, 32, 4) [] = { 1.5, 2.5, 3.5, 4.5 };
 +DECL_VARIABLE (vector_src, float, 32, 4);
 +
 +VLOAD (vector_src, buffer_src, q, float, f, 32, 4);
 +DECL_VARIABLE (vector_res, float, 16, 4) =
 +  vcvt_f16_f32 (VECT_VAR (vector_src, float, 32, 4));
 +vst1_f16 (VECT_VAR (result, float, 16, 4),
 + VECT_VAR (vector_res, float, 16 ,4));
 +
 +CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected, );
 +  }
 +#undef TEST_MSG
 +
 +#if defined (__aarch64__)
 +  clean_results ();
 +
 +#define TEST_MSG vcvt_high_f32_f16
 +  {
 +DECL_VARIABLE (vector_src, float, 16, 8);
 +VLOAD (vector_src, buffer, q, float, f, 16, 8);
 +DECL_VARIABLE (vector_res, float, 32, 4);
 +VECT_VAR (vector_res, float, 32, 4) =
 +  vcvt_high_f32_f16 (VECT_VAR (vector_src, float, 16, 8));
 +vst1q_f32 (VECT_VAR (result, float, 32, 4),
 +  VECT_VAR (vector_res, float, 32, 4));
 +CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected_high, );
 +  }
 +#undef TEST_MSG
 +  clean_results ();
 +
 +#define TEST_MSG vcvt_high_f16_f32
 +  {
 +DECL_VARIABLE (vector_low, float, 16, 4);
 +VDUP (vector_low, , float, f, 16, 4, 2.0);
 +
 +DECL_VARIABLE (vector_src, float, 32, 4);
 +VLOAD (vector_src, buffer, q, float, f, 32, 4);
 +
 +DECL_VARIABLE (vector_res, float, 16, 8) =
 +  vcvt_high_f16_f32 (VECT_VAR (vector_low, float, 16, 4),
 +VECT_VAR (vector_src, float, 32, 4));
 +vst1q_f16 (VECT_VAR (result, float, 16, 8),
 +  VECT_VAR (vector_res, float, 16, 8));
 +
 +CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_high, );
 +  }
 +#endif
 +}
 +
 +int
 +main (void)
 +{
 +  exec_vcvt ();
 +  return 0;
 +}
 diff --git a/gcc/testsuite/lib/target-supports.exp 
 b/gcc/testsuite/lib/target-supports.exp
 index 9aec02d..0a22c95 100644
 --- a/gcc/testsuite/lib/target-supports.exp
 +++ b/gcc/testsuite/lib/target-supports.exp
 @@ -2730,6 +2730,21 @@ proc check_effective_target_arm_neon_fp16_ok { } {
 check_effective_target_arm_neon_fp16_ok_nocache]
  }

 +proc check_effective_target_arm_neon_fp16_hw_ok { } {
 +if {! [check_effective_target_arm_neon_fp16_ok] } {
 +   return 0
 +}
 +global et_arm_neon_fp16_flags
 +check_runtime_nocache arm_neon_fp16_hw_ok {
 +   int
 +   main (int argc, char **argv)
 +   {
 + asm (vcvt.f32.f16 q1, d0);
 + return 0;
 +   }

PING: PATCH: Mention --enable-default-pie in gcc-6/changes.html

2015-08-25 Thread H.J. Lu
On Thu, May 28, 2015 at 6:49 AM, H.J. Lu hjl.to...@gmail.com wrote:
 OK to install?

 H.J.
 ---
 Index: gcc-6/changes.html
 ===
 RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/changes.html,v
 retrieving revision 1.10
 diff -u -p -r1.10 changes.html
 --- gcc-6/changes.html  26 May 2015 10:12:08 -  1.10
 +++ gcc-6/changes.html  28 May 2015 13:49:00 -
 @@ -140,8 +140,12 @@ enum {


  !-- .. --
 -!-- h2Other significant improvements/h2 --
 +h2Other significant improvements/h2

 +  ul
 +liAdded code--enable-default-pie/code configure option to
 +   generate PIE by default./li
 +  /ul

  /body
  /html

PING.

-- 
H.J.


[Bug target/67272] [HSA] register allocator expects that every register must be assigned

2015-08-25 Thread matz at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67272

--- Comment #5 from Michael Matz matz at gcc dot gnu.org ---
Author: matz
Date: Tue Aug 25 16:02:38 2015
New Revision: 227176

URL: https://gcc.gnu.org/viewcvs?rev=227176root=gccview=rev
Log:
PR target/67272
PR target/67296
* hsa.h (hsa_function_representation): Remove prologue member.
* hsa-dump.c (dump_hsa_cfun): Iterator over all BBs.
* hsa-gen.c (hsa_function_representation): Don't init prologue,
start with zero bbs.
(hsa_function_representation::get_shadow_reg): Use entry block,
not prologue member.
(hsa_init_data_for_cfun): Create hsa_bb for entry/exit blocks.
(hsa_deinit_data_for_cfun): Deallocate also for entry/exit blocks.
(gen_hsa_insns_for_kernel_call): Don't use UINT64_MAX, but
the built-in max value of the type.
(gen_function_def_parameters): Don't use prologue member, but
the hsa bb for entry bb.
(wrap_all_hsa_calls): Iterate over all BBs.
* hsa-regalloc.c (naive_process_phi): Revert 2015-08-19 change.
(naive_outof_ssa): Iterate over all BBs.
(dump_hsa_cfun_regalloc): Ditto.
(linear_scan_regalloc): Ditto.
(regalloc): Ditto.

Modified:
branches/hsa/gcc/ChangeLog.hsa
branches/hsa/gcc/hsa-dump.c
branches/hsa/gcc/hsa-gen.c
branches/hsa/gcc/hsa-regalloc.c
branches/hsa/gcc/hsa.h


[Bug target/67296] [HSA] ICE in register allocator (assignment of this argument in a ctor)

2015-08-25 Thread matz at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67296

--- Comment #1 from Michael Matz matz at gcc dot gnu.org ---
Author: matz
Date: Tue Aug 25 16:02:38 2015
New Revision: 227176

URL: https://gcc.gnu.org/viewcvs?rev=227176root=gccview=rev
Log:
PR target/67272
PR target/67296
* hsa.h (hsa_function_representation): Remove prologue member.
* hsa-dump.c (dump_hsa_cfun): Iterator over all BBs.
* hsa-gen.c (hsa_function_representation): Don't init prologue,
start with zero bbs.
(hsa_function_representation::get_shadow_reg): Use entry block,
not prologue member.
(hsa_init_data_for_cfun): Create hsa_bb for entry/exit blocks.
(hsa_deinit_data_for_cfun): Deallocate also for entry/exit blocks.
(gen_hsa_insns_for_kernel_call): Don't use UINT64_MAX, but
the built-in max value of the type.
(gen_function_def_parameters): Don't use prologue member, but
the hsa bb for entry bb.
(wrap_all_hsa_calls): Iterate over all BBs.
* hsa-regalloc.c (naive_process_phi): Revert 2015-08-19 change.
(naive_outof_ssa): Iterate over all BBs.
(dump_hsa_cfun_regalloc): Ditto.
(linear_scan_regalloc): Ditto.
(regalloc): Ditto.

Modified:
branches/hsa/gcc/ChangeLog.hsa
branches/hsa/gcc/hsa-dump.c
branches/hsa/gcc/hsa-gen.c
branches/hsa/gcc/hsa-regalloc.c
branches/hsa/gcc/hsa.h


[libgfortran,committed] Fix default SIGN mode on preconnected/internal units

2015-08-25 Thread FX
Preconnected and internal units currently have their sign mode set to 
SIGN_SUPPRESS, rather than the logical value of SIGN_UNSPECIFIED. This does not 
matter in most cases, since our chosen processor-dependent behavior is to 
suppress optional plus signs anyway…

… except when one tries to override the default behavior with environment 
variable GFORTRAN_OPTIONAL_PLUS, which is thus currently broken on internal and 
preconnected units. Take the following code:

  character(len=20) :: s
  print *, 42.
  write(s,(G0)) 42.
  print *, s
  end

without the patch, run with “GFORTRAN_OPTIONAL_PLUS=y”, it will still output:

   42.000
 42.000  

while with the patch, it will now correctly output (with 
GFORTRAN_OPTIONAL_PLUS=y):

  +42.000
 +42.000 


I regtested on x86_64-apple-darwin15, and committed as trivial.
(I couldn’t come up with a way to figure out how to test that in the testuite, 
though.)

FX





sign.ChangeLog
Description: Binary data


sign.diff
Description: Binary data


Re: [libgfortran,patch] Remove never-used debugging code

2015-08-25 Thread Steve Kargl
On Tue, Aug 25, 2015 at 06:17:13PM +0200, FX wrote:
 Turns out I missed some of the dead code. And I now also fixed comments and 
 some formatting.
 libgfortran/runtime/environ.c is now much more readable than before.
 The patch is still a no-op, in terms of user functionality.
 OK to commit to trunk?
 

Certainly, the dead code can go.  But,is this changing
the library ABI?

troutmask:fvwm:kargl[764] nm /mnt/sgk/work/6/lib/libgfortran.a | grep show_
 T _gfortrani_show_variables
 t show_boolean
 t show_integer
 t show_sep
 t show_string
 T _gfortrani_show_locus




-- 
Steve


Re: [libgfortran,patch] Remove never-used debugging code

2015-08-25 Thread FX
 Certainly, the dead code can go.  But,is this changing the library ABI?
 
 troutmask:fvwm:kargl[764] nm /mnt/sgk/work/6/lib/libgfortran.a | grep show_
  T _gfortrani_show_variables
  t show_boolean
  t show_integer
  t show_sep
  t show_string
  T _gfortrani_show_locus

Nope, none of those functions are actually publicly exported. They are not in 
gfortran.map, being either static, or having _gfortrani_ prefix which means 
internal libgfortran use.

FX

[gomp4] optimize routine calling

2015-08-25 Thread Nathan Sidwell
When forking to call a partitioned routine there is no need to propagate local 
state from the active thread to the forked threads.


I've committed this patch to implement that optimization.

nathan
2015-08-25  Nathan Sidwell  nat...@codesourcery.com

	* config/nvptx/nvptx.c (nvptx_emit_forking): Add is_call argument,
	propagate it into mask.
	(nvptx_emit_joining): Likewise.
	(nvptx_expand_call): Move emit_forking call to later.  Add is_call
	argument.
	(nvptx_expand_oacc_fork, nvptx_expand_oacc_join): Asjust calls.
	(nvptx_discover_pars): Don't look for predecessor insn in call
	forks and joins.
	(nvptx_process_pars): Don't emit propagation code for a call.

Index: gcc/config/nvptx/nvptx.c
===
--- gcc/config/nvptx/nvptx.c	(revision 227159)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -1047,16 +1047,16 @@ nvptx_expand_compare (rtx compare)
 /* Emit forking instructions for MASK.  */
 
 static void
-nvptx_emit_forking (unsigned mask)
+nvptx_emit_forking (unsigned mask, bool is_call)
 {
   mask = (GOMP_DIM_MASK (GOMP_DIM_WORKER)
 	   | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
   if (mask)
 {
-  rtx op = GEN_INT (mask);
+  rtx op = GEN_INT (mask | (is_call  GOMP_DIM_MAX));
   
   /* Emit fork for worker level.  */
-  if (mask  GOMP_DIM_MASK (GOMP_DIM_WORKER))
+  if (!is_call  mask  GOMP_DIM_MASK (GOMP_DIM_WORKER))
 	emit_insn (gen_nvptx_fork (op));
   emit_insn (gen_nvptx_forked (op));
 }
@@ -1065,16 +1065,19 @@ nvptx_emit_forking (unsigned mask)
 /* Emit joining instructions for MASK.  */
 
 static void
-nvptx_emit_joining (unsigned mask)
+nvptx_emit_joining (unsigned mask, bool is_call)
 {
   mask = (GOMP_DIM_MASK (GOMP_DIM_WORKER)
 	   | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
   if (mask)
 {
-  rtx op = GEN_INT (mask);
+  rtx op = GEN_INT (mask | (is_call  GOMP_DIM_MAX));
 
-  /* Emit joining for all pars.  */
-  emit_insn (gen_nvptx_joining (op));
+  /* Emit joining for all non-call pars to ensure there's a single
+	 predecessor for the block the join insn ends up in.  This is
+	 needed for skipping entire loops.  */
+  if (!is_call)
+	emit_insn (gen_nvptx_joining (op));
   emit_insn (gen_nvptx_join (op));
 }
 }
@@ -1135,8 +1138,6 @@ nvptx_expand_call (rtx retval, rtx addre
 	}
 }
 
-  nvptx_emit_forking (parallel);
-
   if (cfun-machine-funtype
   /* It's possible to construct testcases where we call a variable.
 	 See compile/20020129-1.c.  stdarg_p will crash so avoid calling it
@@ -1195,11 +1196,12 @@ nvptx_expand_call (rtx retval, rtx addre
 	  write_func_decl_from_insn (func_decls, retval, pat, callee);
 	}
 }
+  nvptx_emit_forking (parallel, true);
   emit_call_insn (pat);
   if (tmp_retval != retval)
 emit_move_insn (retval, tmp_retval);
 
-  nvptx_emit_joining (parallel);
+  nvptx_emit_joining (parallel, true);
 }
 
 /* Expand the oacc fork  join primitive into ptx-required unspecs.  */
@@ -1207,13 +1209,13 @@ nvptx_expand_call (rtx retval, rtx addre
 void
 nvptx_expand_oacc_fork (unsigned mode)
 {
-  nvptx_emit_forking (GOMP_DIM_MASK (mode));
+  nvptx_emit_forking (GOMP_DIM_MASK (mode), false);
 }
 
 void
 nvptx_expand_oacc_join (unsigned mode)
 {
-  nvptx_emit_joining (GOMP_DIM_MASK (mode));
+  nvptx_emit_joining (GOMP_DIM_MASK (mode), false);
 }
 
 /* Expander for reduction locking and unlocking.  We expect SRC to be
@@ -2611,7 +2613,8 @@ nvptx_discover_pars (bb_insn_map_t *map)
 		l = new parallel (l, mask);
 		l-forked_block = block;
 		l-forked_insn = end;
-		if (mask  GOMP_DIM_MASK (GOMP_DIM_WORKER))
+		if (!(mask  GOMP_DIM_MASK (GOMP_DIM_MAX))
+		 (mask  GOMP_DIM_MASK (GOMP_DIM_WORKER)))
 		  l-fork_insn
 		= nvptx_discover_pre (block, CODE_FOR_nvptx_fork);
 	  }
@@ -2626,7 +2629,8 @@ nvptx_discover_pars (bb_insn_map_t *map)
 		gcc_assert (l-mask == mask);
 		l-join_block = block;
 		l-join_insn = end;
-		if (mask  GOMP_DIM_MASK (GOMP_DIM_WORKER))
+		if (!(mask  GOMP_DIM_MASK (GOMP_DIM_MAX))
+		 (mask  GOMP_DIM_MASK (GOMP_DIM_WORKER)))
 		  l-joining_insn
 		= nvptx_discover_pre (block, CODE_FOR_nvptx_joining);
 		l = l-parent;
@@ -3013,7 +3017,9 @@ nvptx_process_pars (parallel *par)
   inner_mask |= par-inner_mask;
 }
 
-  if (par-mask  GOMP_DIM_MASK (GOMP_DIM_WORKER))
+  if (par-mask  GOMP_DIM_MASK (GOMP_DIM_MAX))
+{ /* No propagation needed for a call.  */ }
+  else if (par-mask  GOMP_DIM_MASK (GOMP_DIM_WORKER))
 {
   nvptx_wpropagate (false, par-forked_block, par-forked_insn);
   nvptx_wpropagate (true, par-forked_block, par-fork_insn);


[Bug middle-end/67351] New: Missed optimisation on 64-bit field compared to 32-bit

2015-08-25 Thread linux at carewolf dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67351

Bug ID: 67351
   Summary: Missed optimisation on 64-bit field compared to 32-bit
   Product: gcc
   Version: 5.2.1
Status: UNCONFIRMED
  Severity: enhancement
  Priority: P3
 Component: middle-end
  Assignee: unassigned at gcc dot gnu.org
  Reporter: linux at carewolf dot com
  Target Milestone: ---

Created attachment 36253
  -- https://gcc.gnu.org/bugzilla/attachment.cgi?id=36253action=edit
Test

Gcc will expand and detect field setting on 32-bit integers, but for some
reason miss the opportunity on 64-bit.

This was discovered as gcc was inexplicably slower compared to clang on the
64-bit case but not when using 32bit.


[Bug target/67296] [HSA] ICE in register allocator (assignment of this argument in a ctor)

2015-08-25 Thread matz at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67296

Michael Matz matz at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #2 from Michael Matz matz at gcc dot gnu.org ---
Fixed more completely than for PR 67272, so the fix for that one is superseded
as well.


Re: Fix libbacktrace -fPIC breakage from Use libbacktrace in libgfortran

2015-08-25 Thread Hans-Peter Nilsson
TL;DR: See last...

 From: Ulrich Weigand uweig...@de.ibm.com
 Date: Tue, 25 Aug 2015 14:59:05 +0200

 However, the compiler actually does accept -fPIC.  If the flag is
 present, we attempt to generate relocatable code, but only to the
 extent the compiler can do that without support for run-time
 relocations.  The most significant restriction is that statically
 initializing a global variable to a pointer will not work.
 (This is useful for some special cases of self-relocating code.
 Such code normally can work around this restriction.)

Still, things like that is why I chose to emit a hard error for
-fPIC/-fpic where it's not supported for *all* code...


 Now, with the patch above, libbacktrace is still compiled with
 -fPIC on SPU, but some files do in fact contain just such global
 initializers, causing compilation to fail:
 
 gcc-head/src/libbacktrace/elf.c:241:27: error: creating run-time relocation 
 for '*.LC2'
  static const char * const debug_section_names[DEBUG_MAX] =
^
 The other GCC run-time libraries rely on libtool to figure out
 that even though -fPIC works, dynamic libraries are still not
 supported on the platform, and thus compile everything for
 static linking (i.e. without -fPIC).

That's not what I see, at least not the figuring out part.
(They mostly use libtool as-is; some test tuples, but some test
version-script support and add it then.)

 I'm wondering if we couldn't use the same libtool mechanism here:
 if the architecture does not support dynamic linking at all, no
 target library will be built as shared library, and thus there is
 no need to build libbacktrace with -fPIC either.  (My understanding
 is that we need to build libbacktrace with -fPIC because it might
 get linked into some other shared target library.)

Yes, that's what the comment in the patch context says, as
happens for libgfortran.

 The libbacktrace configure script actually incorporates all the
 libtool init code that makes this determination, and sets the
 shell variable can_build_shared to no on SPU.  Would it be
 valid to use this variable in the test whether to use -fPIC?
 (I'm not sure which of the many libtool variables are intended
 to be used outside, and which are private ...)

I momentarily pondered this too, when I found the libtool
PIC-test-code grepping libtool/configure, but I chose the
simpler TRY_COMPILE test partly for the same
maybe-internal-variable reason.  A visit to the libtool
documentation shows can_build_shared is for some reason not
listed among the documented variables and a STFW doesn't yield
more information for the first few pages of hits (just some
indexed random libtool copies).  On the other hand, libtool.m4
is in the top directory, so we know if we switch to some version
without can_build_shared.

I'll leave that to you to sort out, but if you chose to use
$can_build_shared, consider also setting PIC_FLAG to $pic_flag
(instead of plain -fPIC).  In the meantime I'll commit my patch
as it solves *some* of the breakage; for targets erroring on -fPIC.

...but reading the libtool documention I think I found a much
better solution: Let's just add -prefer-pic when compiling
libbacktrace.  It leaves everything to libtool.  Can you please
test this?

libbacktrace:
* configure.ac: Use libtool option -prefer-pic, not -fPIC.
* configure: Regenerate.

diff -upr /expvol/pp_slask/hp/checkout/gcchead/gcc/libbacktrace/configure.ac 
libbacktrace/configure.ac
--- libbacktrace/configure.ac   2015-05-29 17:23:20.0 +0200
+++ libbacktrace/configure.ac   2015-08-24 17:31:18.0 +0200
@@ -163,10 +163,11 @@ fi

 # When building as a target library, shared libraries may want to link
 # this in.  We don't want to provide another shared library to
-# complicate dependencies.  Instead, we just compile with -fPIC.
+# complicate dependencies.  Instead, we prefer PIC, if the target
+# supports that through libtool.
 PIC_FLAG=
 if test -n ${with_target_subdir}; then
-  PIC_FLAG=-fPIC
+  PIC_FLAG=-prefer-pic
 fi
 # Similarly, use -fPIC with --enable-host-shared:
 AC_ARG_ENABLE(host-shared,

brgds, H-P


[PATCH] Update wwwdocs for --with-advance-toolchain=at

2015-08-25 Thread Michael Meissner
I installed the following patch on wwwdocs to document the
--with-advance-toolchain=at option I added in June:

2015-08-25  Michael Meissner  meiss...@linux.vnet.ibm.com

* changes.html (PowerPC options): Document new configure option
--with-advance-toolchain=at.

Index: htdocs/gcc-6/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/changes.html,v
retrieving revision 1.21
diff -p -c -r1.21 changes.html
*** htdocs/gcc-6/changes.html   13 Aug 2015 12:32:16 -  1.21
--- htdocs/gcc-6/changes.html   25 Aug 2015 16:38:13 -
*** enum {
*** 182,187 
--- 182,195 
  !-- h3 id=nds32NDS32/h3 --
  
  !-- h3 id=powerpcPowerPC / PowerPC64 / RS6000/h3 --
+   ul
+ lipA new configuration option 
-code--with-advance-toolchain=at/code
+ was added for PowerPC 64-bit Linux systems to use the header files, 
library
+ files, and the dynamic linker from a specific Advance Toolchain release
+ instead of the default versions that are provided by the Linux
+ distribution.  In general, this option is intended for the developers of
+ GCC, and it is not intended for general use.
+   /ul
  
  !-- h3 id=rxRX/h3 --
  

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



[PATCH] rs6000: Fix PR67346

2015-08-25 Thread Segher Boessenkool
*iormode_mask is a define_insn_and_split, so it shouldn't use
can_create_pseudo in its instruction condition, because IRA can then
create such an instruction, and the condition becomes false before
the insn is split.  Use a scratch instead.

Bootstrapped and tested on powerpc64-linux; okay for trunk?


Segher


2015-08-25  Segher Boessenkool  seg...@kernel.crashing.org

PR target/67346
* config/rs6000/rs6000.md (*iormode_mask): Use a match_scratch.

---
 gcc/config/rs6000/rs6000.md | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2138184..1710b5e 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3652,9 +3652,9 @@ (define_split
 (define_insn_and_split *iormode_mask
   [(set (match_operand:GPR 0 gpc_reg_operand =r)
(ior:GPR (match_operand:GPR 1 gpc_reg_operand r)
-(match_operand:GPR 2 const_int_operand n)))]
-  can_create_pseudo_p ()
-!logical_const_operand (operands[2], MODEmode)
+(match_operand:GPR 2 const_int_operand n)))
+   (clobber (match_scratch:GPR 3 =r))]
+  !logical_const_operand (operands[2], MODEmode)
 rs6000_is_valid_mask (operands[2], NULL, NULL, MODEmode)
   #
1
@@ -3669,7 +3669,8 @@ (define_insn_and_split *iormode_mask
 {
   int nb, ne;
   rs6000_is_valid_mask (operands[2], nb, ne, MODEmode);
-  operands[3] = gen_reg_rtx (MODEmode);
+  if (GET_CODE (operands[3]) == SCRATCH)
+operands[3] = gen_reg_rtx (MODEmode);
   operands[4] = GEN_INT (ne);
   operands[5] = GEN_INT (~UINTVAL (operands[2]));
 }
-- 
1.8.1.4



[Bug middle-end/64544] ../../gcc-svn/gcc/cgraphunit.c:2183:1: internal compiler error: in check_probability, at basic-block.h:581

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64544

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |FIXED

--- Comment #3 from Marek Polacek mpolacek at gcc dot gnu.org ---
Should be fixed.  If not, please reopen and be sure to add a preprocessed
source file.


[PATCH] rs6000: Fix PR67344

2015-08-25 Thread Segher Boessenkool
The *andmode3_imm_dot_shifted pattern is a define_insn_and_split,
like most dot patterns: if its output is not assigned cr0 but some
other cr reg, it splits to a non-dot insn and a compare.

Unfortunately that non-dot insn will clobber cr0 as well.  We could
add another clobber (with =X,x), but then that second alternative
is never useful; instead, just remove that second alternative.

Bootstrapped and tested on powerpc64-linux; is this okay for trunk?


Segher


2015-08-25  Segher Boessenkool  seg...@kernel.crashing.org

PR target/67344
* config/rs6000/rs6000.md (*andmode3_imm_dot_shifted): Change to
a define_insn, remove second alternative.

---
 gcc/config/rs6000/rs6000.md | 29 -
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 527ad98..2138184 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3037,15 +3037,15 @@ (define_insn_and_split *andmode3_imm_mask_dot2
(set_attr dot yes)
(set_attr length 4,8)])
 
-(define_insn_and_split *andmode3_imm_dot_shifted
-  [(set (match_operand:CC 3 cc_reg_operand =x,?y)
+(define_insn *andmode3_imm_dot_shifted
+  [(set (match_operand:CC 3 cc_reg_operand =x)
(compare:CC
  (and:GPR
-   (lshiftrt:GPR (match_operand:GPR 1 gpc_reg_operand %r,r)
- (match_operand:SI 4 const_int_operand n,n))
-   (match_operand:GPR 2 const_int_operand n,n))
+   (lshiftrt:GPR (match_operand:GPR 1 gpc_reg_operand %r)
+ (match_operand:SI 4 const_int_operand n))
+   (match_operand:GPR 2 const_int_operand n))
  (const_int 0)))
-   (clobber (match_scratch:GPR 0 =r,r))]
+   (clobber (match_scratch:GPR 0 =r))]
   logical_const_operand (GEN_INT (UINTVAL (operands[2])
INTVAL (operands[4])),
  DImode)
@@ -3054,23 +3054,10 @@ (define_insn_and_split *andmode3_imm_dot_shifted
 rs6000_gen_cell_microcode
 {
   operands[2] = GEN_INT (UINTVAL (operands[2])  INTVAL (operands[4]));
-  if (which_alternative == 0)
-return andi%e2. %0,%1,%u2;
-  else
-return #;
+  return andi%e2. %0,%1,%u2;
 }
-   reload_completed  cc_reg_not_cr0_operand (operands[3], CCmode)
-  [(set (match_dup 0)
-   (and:GPR (lshiftrt:GPR (match_dup 1)
-  (match_dup 4))
-(match_dup 2)))
-   (set (match_dup 3)
-   (compare:CC (match_dup 0)
-   (const_int 0)))]
-  
   [(set_attr type logical)
-   (set_attr dot yes)
-   (set_attr length 4,8)])
+   (set_attr dot yes)])
 
 
 (define_insn andmode3_mask
-- 
1.8.1.4



Re: [PATCH 14/15][ARM/AArch64 Testsuite]Add test of vcvt{,_high}_{f16_f32,f32_f16}

2015-08-25 Thread Alan Lawrence

Christophe Lyon wrote:

On 28 July 2015 at 13:27, Alan Lawrence alan.lawre...@arm.com wrote:

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp:
set additional flags for neon-fp16 support.
* gcc.target/aarch64/advsimd-intrinsics/vcvt_f16.c: New.


Is that the right version of the patch?

The advsimd-intrinsics.exp part conflicts with patch 13/15.

Am I missing something?

Christophe.



Oh, sorry, thanks for pointing this out. Looks like I reposted the previous 
version, rather than what I'd been testing, which drops the conflicting hunk.


Thanks,
Alan



RE: [PATCH ppc64,aarch64,alpha 00/15] Improve backend constant generation

2015-08-25 Thread Wilco Dijkstra
 Richard Henderson wrote:
 On 08/12/2015 08:59 AM, Wilco Dijkstra wrote:
  I looked at the statistics of AArch64 immediate generation a while ago.
  The interesting thing is ~95% of calls are queries, and the same query is on
  average repeated 10 times in a row. So (a) it is not important to cache the
  expansions, and (b) the high repetition rate means a single-entry cache
  has a 90% hitrate. We already have a patch for this and could collect stats
  comparing the approaches. If a single-entry cache can provide a similar
  benefit as caching all immediates then my preference would be to keep things
  simple and just cache the last query.
 
 Interesting.  That's already more detailed investigation than I'd done.  I had
 no idea the queries were so clustered.  I assumed that the queries would be
 scattered across various passes, and so the various constants across the
 function would get checked in sequence.
 
 I would be very interested in seeing those stats when you've done.

Caching improves average buildtime by 0.1-0.2% - your patch seems to be slightly
faster than caching just 1 query, so that suggests caching a few entries would 
be
beneficial.

However looking at the immediates that are generated by the loops, it's feasible
to avoid linear/quadratic search loops altogether. So I think a generic 
immediate 
caching scheme won't be useful for AArch64.

Wilco




Re: [PATCH] rs6000: Fix PR67346

2015-08-25 Thread David Edelsohn
On Tue, Aug 25, 2015 at 1:09 PM, Segher Boessenkool
seg...@kernel.crashing.org wrote:
 *iormode_mask is a define_insn_and_split, so it shouldn't use
 can_create_pseudo in its instruction condition, because IRA can then
 create such an instruction, and the condition becomes false before
 the insn is split.  Use a scratch instead.

 Bootstrapped and tested on powerpc64-linux; okay for trunk?


 Segher


 2015-08-25  Segher Boessenkool  seg...@kernel.crashing.org

 PR target/67346
 * config/rs6000/rs6000.md (*iormode_mask): Use a match_scratch.

Okay.

Thanks, David


Re: Moving to git

2015-08-25 Thread Eric S. Raymond
Jason Merrill ja...@redhat.com:
 git-svn find-rev takes r123456 and returns a commit hash based on the
 git-svn-id in the git log; I don't see why we would need to break that
 moving forward, though I'm not sure how well it would work without reference
 to an actual SVN server.

It won't work at all, because git-svn won't.
-- 
a href=http://www.catb.org/~esr/;Eric S. Raymond/a


Re: [PATCH 3/5] Build ARRAY_REFs when the base is of ARRAY_TYPE.

2015-08-25 Thread Bin.Cheng
On Wed, Aug 26, 2015 at 3:50 AM, Jeff Law l...@redhat.com wrote:
 On 08/25/2015 05:06 AM, Alan Lawrence wrote:

 When SRA completely scalarizes an array, this patch changes the
 generated accesses from e.g.

 MEM[(int[8] *)a + 4B] = 1;

 to

 a[1] = 1;

 This overcomes a limitation in dom2, that accesses to equivalent
 chunks of e.g. MEM[(int[8] *)a] are not hashable_expr_equal_p with
 accesses to e.g. MEM[(int[8] *)a]. This is necessary for constant
 propagation in the ssa-dom-cse-2.c testcase (after the next patch
 that makes SRA handle constant-pool loads).

 I tried to work around this by making dom2's hashable_expr_equal_p
 less conservative, but found that on platforms without AArch64's
 vectorized reductions (specifically Alpha, hppa, PowerPC, and SPARC,
 mentioned in ssa-dom-cse-2.c), I also needed to make MEM[(int[8]
 *)a] equivalent to a[0], etc.; a complete overhaul of
 hashable_expr_equal_p seems like a larger task than this patch
 series.

 I can't see how to write a testcase for this in C though as direct
 assignment to an array is not possible; such assignments occur only
 with constant pool data, which is dealt with in the next patch.

 It's a general issue that if there's  1 common way to represent an
 expression, then DOM will often miss discovery of the CSE opportunity
 because of the way it hashes expressions.

 Ideally we'd be moving to a canonical form, but I also realize that in
 the case of memory references like this, that may not be feasible.
IIRC, there were talks about lowering all memory reference on GIMPLE?
Which is the reverse approach.  Since SRA is in quite early
compilation stage, don't know if lowered memory reference has impact
on other optimizers.

Thanks,
bin

 It does make me wonder how many CSEs we're really missing due to the two
 ways to represent array accesses.


 Bootstrap + check-gcc on x86-none-linux-gnu,
 arm-none-linux-gnueabihf, aarch64-none-linux-gnu.

 gcc/ChangeLog:

 * tree-sra.c (completely_scalarize): Move some code into:
 (get_elem_size): New. (build_ref_for_offset): Build ARRAY_REF if base
 is aligned array. --- gcc/tree-sra.c | 110
 - 1 file
 changed, 69 insertions(+), 41 deletions(-)

 diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 08fa8dc..af35fcc
 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -957,6 +957,20 @@
 scalarizable_type_p (tree type) } }

 +static bool +get_elem_size (const_tree type, unsigned HOST_WIDE_INT
 *sz_out)

 Function comment needed.

 I may have missed it in the earlier patches, but can you please make
 sure any new functions you created have comments in those as well.  Such
 patches are pre-approved.

 With the added function comment, this patch is fine.

 jeff




Re: Moving to git

2015-08-25 Thread Jason Merrill

On 08/24/2015 11:49 AM, Jeff Law wrote:

On 08/24/2015 09:43 AM, Jakub Jelinek wrote:

Not to mention we should keep the existing r123456 comments in bugzilla
working, and I'm not convinced keeping a SVN version of the repository
(frozen) for that purpose is the best idea.

I'd like to keep the old ones working, but new references should
probably be using the hash id and commit name.

As for how to best keep the old r123456 links working, I don't know.
Presumably those could be mapped behind the scenes to a git id.


git-svn find-rev takes r123456 and returns a commit hash based on the 
git-svn-id in the git log; I don't see why we would need to break that 
moving forward, though I'm not sure how well it would work without 
reference to an actual SVN server.


Jason



Re: Moving to git

2015-08-25 Thread Jason Merrill

On 08/24/2015 11:54 AM, Richard Earnshaw wrote:

Why not use the output of 'git show -s --format=%ct-%h'?

$ git show -s --format=%ct-%h master
1440153969-f57da59

That gives you a unix timestamp for the commit, followed by the hash.
Now you've got a fully ordered way of referring to the commit, but still
have access to the hash code.


You don't even need to worry about the hash code, you can use the 
timestamp by itself.  Given the timestamp,


  git log -1 --until 1440153969

will show you the relevant commit, or

  git rev-list HEAD --max-count=1 --until 1440153969

will give you the hash.

So that seems like a suitable monotonically increasing identifier.  What 
do you think, Jakub?


Jason



[Bug tree-optimization/67349] [5 regression] ICE on optimization

2015-08-25 Thread trippels at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67349

Markus Trippelsdorf trippels at gcc dot gnu.org changed:

   What|Removed |Added

 Target|h8300   |
 Status|UNCONFIRMED |NEW
   Last reconfirmed||2015-08-25
 CC||trippels at gcc dot gnu.org
  Component|target  |tree-optimization
Summary|ICE on optimization |[5 regression] ICE on
   ||optimization
 Ever confirmed|0   |1

--- Comment #2 from Markus Trippelsdorf trippels at gcc dot gnu.org ---
Trunk and gcc-4.9 are fine.

trippels@gcc2-power8 ~ % cat syn.i
struct list_head
{
  struct list_head *prev;
};
extern void __wait_rcu_gp (void*);
const int a = sizeof 0;
static inline __attribute__ ((always_inline no_instrument_function)) void
rcu_barrier_sched (void)
{
  struct list_head b[a];
  __wait_rcu_gp (b);
}

static inline __attribute__ ((always_inline no_instrument_function)) void
rcu_barrier (void)
{
  rcu_barrier_sched ();
}

struct
{
  void *wait;
} c[] = { rcu_barrier, rcu_barrier_sched };

trippels@gcc2-power8 ~ % /home/trippels/gcc_5/usr/local/bin/gcc -c -O2 syn.i
syn.i: In function ‘rcu_barrier_sched’:
syn.i:8:1: internal compiler error: Segmentation fault
 rcu_barrier_sched (void)
 ^
0x107e9053 crash_signal
../../gcc/gcc/toplev.c:383
0x10937a64 tree_check
../../gcc/gcc/tree.h:2850
0x10937a64 fold_builtin_alloca_with_align
../../gcc/gcc/tree-ssa-ccp.c:2067
0x10937a64 ccp_fold_stmt
../../gcc/gcc/tree-ssa-ccp.c:2172
0x109d85a3
substitute_and_fold_dom_walker::before_dom_children(basic_block_def*)
../../gcc/gcc/tree-ssa-propagate.c:1177
0x10dd228b dom_walker::walk(basic_block_def*)
../../gcc/gcc/domwalk.c:188
0x109d7993 substitute_and_fold(tree_node* (*)(tree_node*), bool
(*)(gimple_stmt_iterator*), bool)
../../gcc/gcc/tree-ssa-propagate.c:1272
0x1092f0eb ccp_finalize
../../gcc/gcc/tree-ssa-ccp.c:941
0x1092f0eb do_ssa_ccp
../../gcc/gcc/tree-ssa-ccp.c:2382
0x1092f0eb execute
../../gcc/gcc/tree-ssa-ccp.c:2414
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.

[Bug tree-optimization/67055] [5/6 Regression] Segmentation fault in fold_builtin_alloca_with_align in tree-ssa-ccp.c

2015-08-25 Thread trippels at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055

Markus Trippelsdorf trippels at gcc dot gnu.org changed:

   What|Removed |Added

 CC||ysato at users dot 
sourceforge.jp

--- Comment #12 from Markus Trippelsdorf trippels at gcc dot gnu.org ---
*** Bug 67349 has been marked as a duplicate of this bug. ***


[Bug tree-optimization/67349] [5 regression] ICE on optimization

2015-08-25 Thread ysato at users dot sourceforge.jp
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67349

--- Comment #3 from Yoshinori Sato ysato at users dot sourceforge.jp ---
I tested
gcc version 6.0.0 20150710 (experimental) (GCC)

I'll trying latest trunk.

Thanks,


[Bug inline-asm/67317] [x86] Silly code generation for _addcarry_u32/_addcarry_u64

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67317

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |WAITING
   Last reconfirmed||2015-08-25
 Ever confirmed|0   |1


[Bug middle-end/67340] [6 Regression] ICE: in convert_move, at expr.c:279

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67340

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Target||hppa*-*-*
 CC||aoliva at gcc dot gnu.org
   Target Milestone|--- |6.0


[PATCH, ARM] List Cs and US constraints as being used

2015-08-25 Thread Thomas Preud'homme
Hi,

The header in gcc/config/arm/constraints.md list all the ARM-specific 
constraints defined and for which targets they are but miss a couple of them. 
This patch add the missing Cs and US constraints to the list.

Patch was tested by verifying that arm-none-eabi-gcc cross-compiler can still 
be build (ie the comment remains a comment).

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 42935a4..2d9ffb8 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -21,7 +21,7 @@
 ;; The following register constraints have been used:
 ;; - in ARM/Thumb-2 state: t, w, x, y, z
 ;; - in Thumb state: h, b
-;; - in both states: l, c, k, q, US
+;; - in both states: l, c, k, q, Cs, Ts, US
 ;; In ARM state, 'l' is an alias for 'r'
 ;; 'f' and 'v' were previously used for FPA and MAVERICK registers.


Committed as obvious with the following ChangeLog entry:

2015-08-25  Thomas Preud'homme  thomas.preudho...@arm.com

* config/arm/constraints.md: Also list Cs and US ARM-specific
constraints as used.

Best regards,

Thomas




[Bug bootstrap/66038] [5 regression] (stage 2) build/genmatch issue (gcc/hash-table.h|c) with --disable-checking [ introduced by r218976 ]

2015-08-25 Thread kumba at gentoo dot org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66038

--- Comment #27 from Joshua Kinard kumba at gentoo dot org ---
(In reply to Richard Biener from comment #26)
 Don't hold your breath.  Basically somebody who can reproduce it has to find
 the root-cause and a fix.

4.9.3 works, and the problem appears specific to genmatch with the '--gimple'
argument.  I guess I can test to see if 5.0.0 is also affected, and then start
diffing the genmatch.c files between working/non-working version to trace the
problem down.  That will be quicker than git bisecting on these machines (old
SGI machines).  Can't stay stuck on 4.9.x forever...


[Bug middle-end/67005] [5/6 Regression] ICE: in verify_loop_structure, at cfgloop.c:1647 (loop with header n not in loop tree)

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67005

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 CC||mpolacek at gcc dot gnu.org

--- Comment #2 from Marek Polacek mpolacek at gcc dot gnu.org ---
Still ICEs; probably it's just about adding loops_state_set (LOOPS_NEED_FIXUP);
somewhere into tree-ssa-dce.c?


[Bug tree-optimization/67349] [5 regression] ICE on optimization

2015-08-25 Thread trippels at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67349

Markus Trippelsdorf trippels at gcc dot gnu.org changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #4 from Markus Trippelsdorf trippels at gcc dot gnu.org ---
dup.

*** This bug has been marked as a duplicate of bug 67055 ***


[Bug tree-optimization/67055] [5 Regression] Segmentation fault in fold_builtin_alloca_with_align in tree-ssa-ccp.c

2015-08-25 Thread trippels at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055

Markus Trippelsdorf trippels at gcc dot gnu.org changed:

   What|Removed |Added

Summary|[5/6 Regression]|[5 Regression] Segmentation
   |Segmentation fault in   |fault in
   |fold_builtin_alloca_with_al |fold_builtin_alloca_with_al
   |ign in tree-ssa-ccp.c   |ign in tree-ssa-ccp.c

--- Comment #13 from Markus Trippelsdorf trippels at gcc dot gnu.org ---
Another testcase:

trippels@gcc2-power8 ~ % cat syn.i
struct list_head
{
  struct list_head *prev;
};
extern void __wait_rcu_gp (void*);
const int a = sizeof 0;
static inline __attribute__ ((always_inline no_instrument_function)) void
rcu_barrier_sched (void)
{
  struct list_head b[a];
  __wait_rcu_gp (b);
}

static inline __attribute__ ((always_inline no_instrument_function)) void
rcu_barrier (void)
{
  rcu_barrier_sched ();
}

struct
{
  void *wait;
} c[] = { rcu_barrier, rcu_barrier_sched };

trippels@gcc2-power8 ~ % /home/trippels/gcc_5/usr/local/bin/gcc -c -O2 syn.i
syn.i: In function ‘rcu_barrier_sched’:
syn.i:8:1: internal compiler error: Segmentation fault
 rcu_barrier_sched (void)
 ^
0x107e9053 crash_signal
../../gcc/gcc/toplev.c:383
0x10937a64 tree_check
../../gcc/gcc/tree.h:2850
0x10937a64 fold_builtin_alloca_with_align
../../gcc/gcc/tree-ssa-ccp.c:2067
0x10937a64 ccp_fold_stmt
../../gcc/gcc/tree-ssa-ccp.c:2172
0x109d85a3
substitute_and_fold_dom_walker::before_dom_children(basic_block_def*)
../../gcc/gcc/tree-ssa-propagate.c:1177
0x10dd228b dom_walker::walk(basic_block_def*)
../../gcc/gcc/domwalk.c:188
0x109d7993 substitute_and_fold(tree_node* (*)(tree_node*), bool
(*)(gimple_stmt_iterator*), bool)
../../gcc/gcc/tree-ssa-propagate.c:1272
0x1092f0eb ccp_finalize
../../gcc/gcc/tree-ssa-ccp.c:941
0x1092f0eb do_ssa_ccp
../../gcc/gcc/tree-ssa-ccp.c:2382
0x1092f0eb execute
../../gcc/gcc/tree-ssa-ccp.c:2414
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.

[Bug bootstrap/66038] [5 regression] (stage 2) build/genmatch issue (gcc/hash-table.h|c) with --disable-checking [ introduced by r218976 ]

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66038

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Target|powerpc-darwin  |powerpc-darwin,
   ||mips64-linux-n32
 Status|WAITING |NEW

--- Comment #26 from Richard Biener rguenth at gcc dot gnu.org ---
Don't hold your breath.  Basically somebody who can reproduce it has to find
the root-cause and a fix.


[testsuite] Clean up effective_target cache

2015-08-25 Thread Christophe Lyon
Hi,

Some subsets of the tests override ALWAYS_CXXFLAGS or
TEST_ALWAYS_FLAGS and perform effective_target support tests using
these modified flags.

In case these flags conflict with the effective_target tests, it means
that subsequent tests will be UNSUPPORTED even though
ALWAYS_CXXFLAGS/TEST_ALWAYS_FLAGS have been reset and no longer
conflict.

In practice, we noticed this when running validation under 'ulimit -v
XXX', which can conflict with ASAN. We observed that sse2 and
stack_protector tests would randomly fail when tested from asan.exp,
making non-asan tests UNSUPPORTED.

This patch adds a new function 'clear_effective_target_cache', which
is called at the end of every .exp file which overrides
ALWAYS_CXXFLAGS or TEST_ALWAYS_FLAGS.

I tested it works well for asan.exp on x86_64 but the changes in other
.exp files seem mechanical.

However, I noticed that lib/g++.exp changes ALWAYS_CXXFLAGS, but does
not appear to restore it. In doubt, I didn't change it.

OK?

Christophe.
2015-08-25  Christophe Lyon  christophe.l...@linaro.org

	* lib/target-supports.exp (clear_effective_target_cache): New.
	(check_cached_effective_target): Update et_prop_list.
	* lib/asan-dg.exp (asan_finish): Call clear_effective_target_cache.
	* g++.dg/compat/compat.exp: Likewise.
	* g++.dg/compat/struct-layout-1.exp: Likewise.
	* lib/asan-dg.exp: Likewise.
	* lib/atomic-dg.exp: Likewise.
	* lib/cilk-plus-dg.exp: Likewise.
	* lib/clearcap.exp: Likewise.
	* lib/mpx-dg.exp: Likewise.
	* lib/target-supports.exp: Likewise.
	* lib/tsan-dg.exp: Likewise.
	* lib/ubsan-dg.exp: Likewise.

diff --git a/gcc/testsuite/g++.dg/compat/compat.exp b/gcc/testsuite/g++.dg/compat/compat.exp
index 1272289..4c4b25f 100644
--- a/gcc/testsuite/g++.dg/compat/compat.exp
+++ b/gcc/testsuite/g++.dg/compat/compat.exp
@@ -78,6 +78,7 @@ proc compat-use-tst-compiler { } {
 	set ALWAYS_CXXFLAGS $save_always_cxxflags
 	set ld_library_path $save_ld_library_path
 	set_ld_library_path_env_vars
+	clear_effective_target_cache
 }
 }
 
diff --git a/gcc/testsuite/g++.dg/compat/struct-layout-1.exp b/gcc/testsuite/g++.dg/compat/struct-layout-1.exp
index d98..097a731 100644
--- a/gcc/testsuite/g++.dg/compat/struct-layout-1.exp
+++ b/gcc/testsuite/g++.dg/compat/struct-layout-1.exp
@@ -61,6 +61,7 @@ proc compat-use-alt-compiler { } {
 	set ld_library_path $alt_ld_library_path
 	set_ld_library_path_env_vars
 	restore_gcc_exec_prefix_env_var
+	clear_effective_target_cache
 }
 }
 
diff --git a/gcc/testsuite/lib/asan-dg.exp b/gcc/testsuite/lib/asan-dg.exp
index 141a479..3ce264e 100644
--- a/gcc/testsuite/lib/asan-dg.exp
+++ b/gcc/testsuite/lib/asan-dg.exp
@@ -138,6 +138,7 @@ proc asan_finish { args } {
 }
 set ld_library_path $asan_saved_library_path
 set_ld_library_path_env_vars
+clear_effective_target_cache
 }
 
 # Symbolize lines like
diff --git a/gcc/testsuite/lib/atomic-dg.exp b/gcc/testsuite/lib/atomic-dg.exp
index d9df227..fe24127 100644
--- a/gcc/testsuite/lib/atomic-dg.exp
+++ b/gcc/testsuite/lib/atomic-dg.exp
@@ -101,4 +101,5 @@ proc atomic_finish { args } {
 } else {
 	unset TEST_ALWAYS_FLAGS
 }
+clear_effective_target_cache
 }
diff --git a/gcc/testsuite/lib/cilk-plus-dg.exp b/gcc/testsuite/lib/cilk-plus-dg.exp
index 38e5400..7f38f37 100644
--- a/gcc/testsuite/lib/cilk-plus-dg.exp
+++ b/gcc/testsuite/lib/cilk-plus-dg.exp
@@ -101,4 +101,5 @@ proc cilkplus_finish { args } {
 } else {
 	unset TEST_ALWAYS_FLAGS
 }
+clear_effective_target_cache
 }
diff --git a/gcc/testsuite/lib/clearcap.exp b/gcc/testsuite/lib/clearcap.exp
index d41aa1e..3e2a88c 100644
--- a/gcc/testsuite/lib/clearcap.exp
+++ b/gcc/testsuite/lib/clearcap.exp
@@ -55,4 +55,5 @@ proc clearcap-finish { args } {
 } else {
 	unset TEST_ALWAYS_FLAGS
 }
+clear_effective_target_cache
 }
diff --git a/gcc/testsuite/lib/mpx-dg.exp b/gcc/testsuite/lib/mpx-dg.exp
index c8f64cd..b2bd40c 100644
--- a/gcc/testsuite/lib/mpx-dg.exp
+++ b/gcc/testsuite/lib/mpx-dg.exp
@@ -142,4 +142,5 @@ proc mpx_finish { args } {
 }
 set ld_library_path $mpx_saved_library_path
 set_ld_library_path_env_vars
+clear_effective_target_cache
 }
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 1988301..e2084bb 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -117,6 +117,7 @@ proc current_target_name { } {
 
 proc check_cached_effective_target { prop args } {
 global et_cache
+global et_prop_list
 
 set target [current_target_name]
 if {![info exists et_cache($prop,target)]
@@ -124,12 +125,30 @@ proc check_cached_effective_target { prop args } {
 	verbose check_cached_effective_target $prop: checking $target 2
 	set et_cache($prop,target) $target
 	set et_cache($prop,value) [uplevel eval $args]
+	lappend et_prop_list $prop
+	verbose check_cached_effective_target cached list is now: $et_prop_list 2
 }
 set value $et_cache($prop,value)
 verbose 

[Bug c++/67313] [6 Regression] ICE: in vague_linkage_p, at cp/decl2.c:1878 with -fno-weak and variadic template

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67313

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

   Target Milestone|--- |6.0


[Bug c++/67315] [4.9 Regression] Strange 'this' pointer behavior when calling virtual function with different optimization attributes.

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67315

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

   Target Milestone|--- |4.9.4


[Bug c++/67315] [4.9 Regression] Strange 'this' pointer behavior when calling virtual function with different optimization attributes.

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67315

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 CC||hubicka at gcc dot gnu.org

--- Comment #2 from Richard Biener rguenth at gcc dot gnu.org ---
I think this is a dup of the bug with the i386 backend issue of local calling
conventions and its interaction with optimization attributes (optimize
setting).
Somebody find it ... ;)


[Bug tree-optimization/67326] [5/6 Regression] -ftree-loop-if-convert-stores does not vectorize conditional assignment (anymore)

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67326

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
   Keywords||missed-optimization
   Last reconfirmed||2015-08-25
 CC||rguenth at gcc dot gnu.org,
   ||venkataramanan.kumar at amd 
dot co
   ||m
 Ever confirmed|0   |1
Summary|[5.2/6.0 regression]|[5/6 Regression]
   |-ftree-loop-if-convert-stor |-ftree-loop-if-convert-stor
   |es does not vectorize   |es does not vectorize
   |conditional assignment  |conditional assignment
   |(anymore)   |(anymore)
   Target Milestone|--- |6.0
   Severity|normal  |enhancement

--- Comment #1 from Richard Biener rguenth at gcc dot gnu.org ---
This is because in condAssign1 v3 is not accessed always and thus we do not
know (ok, stupid ifcvt limitation) that v3[i] is not accessed out-of-bounds.
Previous to

2015-07-10  Richard Biener  rguent...@suse.de

PR tree-optimization/66823
* tree-if-conv.c (memrefs_read_or_written_unconditionally): Fix
inverted predicate.

ifcvt's reasoning was oh, v3[i] is _not_ equal to v2[i] which is always
accessed, thus it's fine to access it always as well.  I fixed this bug
but did not try to enhance ifcvts idea of what operations can trap
(v3[i] is thought to eventually trap because we do not try to analyze
what values 'i' can have).

So in 4.9 and earlier this only works becuase of the above bug.  So, kind
of confirmed, but it's really an enhancement request.  AFAIR Venkat is
working
in this area.


[Bug c++/67315] [4.9 Regression] Strange 'this' pointer behavior when calling virtual function with different optimization attributes.

2015-08-25 Thread redi at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67315

--- Comment #3 from Jonathan Wakely redi at gcc dot gnu.org ---
PR 54068 ?


Re: [AArch64] [TLSIE][1/2] Rename test source file for reuse

2015-08-25 Thread Marcus Shawcroft
On 19 June 2015 at 10:15, Jiong Wang jiong.w...@arm.com wrote:

 Rename test source from tlsle.c into tls.c for reuse purpose.

 tls.c will be used as test source file for all TLS test, we just need to
 specify different tls options in different testcases.

 2015-06-19  Jiong Wang  jiong.w...@arm.com

 gcc/testsuite/
   * gcc.target/aarch64/tlsle.c: Rename to tls.c
   * gcc.target/aarch64/aarch64/tlsle12.c: Update source file name.
   * gcc.target/aarch64/aarch64/tlsle24.c: Ditto.
   * gcc.target/aarch64/aarch64/tlsle32.c: Ditto.

OK Thanks /Marcus


[Bug tree-optimization/67312] [6 Regression] ICE: SIGSEGV in expand_expr_real_1 (expr.c:9561) with -ftree-coalesce-vars

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67312

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

   Target Milestone|--- |6.0

--- Comment #1 from Richard Biener rguenth at gcc dot gnu.org ---
We shouldn't do coalesce-vars at -O0 I think.


[Bug middle-end/67118] gcc and gfortran started crashing recently

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67118

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |WAITING
   Last reconfirmed||2015-08-25
 CC||mpolacek at gcc dot gnu.org
 Ever confirmed|0   |1
   Severity|blocker |normal


[Bug middle-end/67298] [6 Regression] 254.gap in SPEC CPU 2000 is miscompiled

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67298

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |WAITING
   Last reconfirmed||2015-08-25
 CC||mpolacek at gcc dot gnu.org
 Ever confirmed|0   |1

--- Comment #5 from Marek Polacek mpolacek at gcc dot gnu.org ---
Probably INVALID then...


[Bug middle-end/66984] ICE: fold_binary changes type of operand, causing failure in verify_gimple_assign_binary

2015-08-25 Thread mpolacek at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66984

Marek Polacek mpolacek at gcc dot gnu.org changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 CC||mpolacek at gcc dot gnu.org
 Resolution|--- |FIXED

--- Comment #8 from Marek Polacek mpolacek at gcc dot gnu.org ---
Assuming fixed then.


[Bug target/67349] ICE on optimization

2015-08-25 Thread miyuki at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67349

Mikhail Maltsev miyuki at gcc dot gnu.org changed:

   What|Removed |Added

 CC||miyuki at gcc dot gnu.org

--- Comment #1 from Mikhail Maltsev miyuki at gcc dot gnu.org ---
I cannot reproduce this with current trunk (h8300 cross), but earlier revisions
(including 5.2 release) do crash even on x86_64. I suppose this is the same
bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055#c9

Please provide the output of h8300-unknown-linux-gcc -v.


[Bug middle-end/67341] [ICE] libgo build failure: in mark_stmt_if_obviously_necessary, at tree-ssa-dce.c:278

2015-08-25 Thread miyuki at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67341

Mikhail Maltsev miyuki at gcc dot gnu.org changed:

   What|Removed |Added

 CC||miyuki at gcc dot gnu.org

--- Comment #1 from Mikhail Maltsev miyuki at gcc dot gnu.org ---
Probably a dup of PR67284. At least for me bootstrap passes even with -O3 (with
--enable-checking=yes, though) with r227145.


[Bug tree-optimization/37021] Fortran Complex reduction / multiplication not vectorized

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37021

--- Comment #21 from Richard Biener rguenth at gcc dot gnu.org ---
(In reply to Bill Schmidt from comment #20)
 We still don't vectorize the original code example on Power.  It appears
 that this is being disabled because of an alignment issue.  The data
 references are being rejected by:
 
 product.f:9:0: note: can't force alignment of ref: REALPART_EXPR
 *a.0_24[_50]
 
 and similar for the other three DRs.  This happens due to this code in
 vect_compute_data_ref_alignment:
 
   if (base_alignment  TYPE_ALIGN (vectype))
 {
   /* Strip an inner MEM_REF to a bare decl if possible.  */
   if (TREE_CODE (base) == MEM_REF
integer_zerop (TREE_OPERAND (base, 1))
TREE_CODE (TREE_OPERAND (base, 0)) == ADDR_EXPR)
 base = TREE_OPERAND (TREE_OPERAND (base, 0), 0);
 
   if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
 {
   if (dump_enabled_p ())
 {
   dump_printf_loc (MSG_NOTE, vect_location,
can't force alignment of ref: );
   dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
   dump_printf (MSG_NOTE, \n);
 }
   return true;
 }
 
 Here TYPE_ALIGN (vectype) is 128 (Power vectors are normally aligned on a
 128-bit value), and base_alignment is 64.  a.0 is defined as:
 
 complex(kind=8) [0:D.1831] * restrict a.0;
 
 In both ELFv1 and ELFv2 ABIs for Power, a complex type is defined to have
 the same alignment as the underlying type.  So complex double has 8-byte
 alignment.
 
 On earlier versions of Power, the decision is fine, because unaligned
 accesses are expensive prior to POWER8.  With POWER8, though, an unaligned
 access will (most of the time) perform as well as an aligned access.  So
 ideally we would like to teach the vectorizer to allow vectorization here.
 
 It seems like vect_supportable_dr_alignment ought to be considered as part
 of the SLP vectorization decision here, rather than just comparing the base
 alignment with the vector type alignment.  Adding a check for that allows
 things to get a little further, but we still don't vectorize the block.  (I
 haven't yet looked into why, but I assume more needs to be done downstream
 to handle this case.)
 
 My understanding of the vectorizer is not yet very deep, so before going too
 far down the wrong path, I'd like your opinion on the best approach to
 fixing the problem.  Thanks!

I see it only failing due to cost issues (tried ppc64le and -mcpu=power8).
The unaligned loads cost 3 and we end up with

t.f90:8:0: note: Cost model analysis:
  Vector inside of loop cost: 40
  Vector prologue cost: 8
  Vector epilogue cost: 4
  Scalar iteration cost: 12
  Scalar outside cost: 6
  Vector outside cost: 12
  prologue iterations: 0
  epilogue iterations: 0
t.f90:8:0: note: cost model: the vector iteration cost = 40 divided by the
scalar iteration cost = 12 is greater or equal to the vectorization factor = 1.

Note that we are (still) not very good in estimating the SLP cost as we
account 4 vector loads here (because we essentially will end up with
4 different permutations used), so the unaligned part is accounted for
too much and likely the permutation cost as well.  Both are a limitation
of the SLP data structures and not easily fixable.  With
-fvect-cost-model=unlimited I see both loops vectorized.

 Bill


[Bug tree-optimization/67328] range test rather than single bit test for code testing enum values

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67328

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |WAITING
   Last reconfirmed||2015-08-25
 Ever confirmed|0   |1

--- Comment #2 from Richard Biener rguenth at gcc dot gnu.org ---
Note this is already fold-const.c:optimize_bit_field_compare at work.  With
-DALT (non-working code) we get

;; Function test_pic (null)
;; enabled by -tree-original


{
  if ((BIT_FIELD_REF *info, 8, 0  3) + 254 = 1)

and

;; Function test_exe (null)
;; enabled by -tree-original


{
  if ((SAVE_EXPR BIT_FIELD_REF *info, 8, 0  3) == 0 || (SAVE_EXPR
BIT_FIELD_REF *info, 8, 0  3) == 2)

from it.  Without -DALT

;; Function test_pic (null)
;; enabled by -tree-original


{
  if ((SAVE_EXPR BIT_FIELD_REF *info, 8, 0  3) == 3 || (SAVE_EXPR
BIT_FIELD_REF *info, 8, 0  3) == 1)

;; Function test_exe (null)
;; enabled by -tree-original


{
  if ((BIT_FIELD_REF *info, 8, 0  3) = 1)


I see more that a single bit test for both cases btw, mostly because we
need to mask the padding.  Not sure what optimal code you expect here.


[Bug c++/67318] [6 regression] Parsing error when using abbreviated integral type names in template parameter pack declaration

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67318

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

   Target Milestone|--- |6.0


[Bug debug/67293] Very large DW_AT_const_value produced

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67293

--- Comment #3 from Richard Biener rguenth at gcc dot gnu.org ---
(In reply to Jakub Jelinek from comment #2)
 I'm fine with introducing some limit on the size of const values, with a
 param.
 As for the other question, I think you've answered that yourself,
 if the const ends up in the source, then that is supposedly because
 something needed its address.  At that point it is much better to be able to
 print that address in the debugger.
 If you want to stream DW_AT_const_value early and actually rewrite the DWARF
 during LTO later, rather than just reference unmodified DIEs from other
 DIEs, then you could if you end up having an address actually remove the
 DW_AT_const_value and replace it with DW_AT_location if possible.

A DWARF optimizer could do this indeed.  With the current laid out scheme
for LTO debug we will add a DW_AT_location late if the object is instantiated
thus the debugger will see both.  But I will have to pro-actively add
the DW_AT_const_value early in case the object doesn't get emitted - but
I definitely want to limit the size of the eventually pointless DWARF.

I can do the re-writing (remove DW_AT_const_value if we have a location)
for the non-LTO path though (and also try adding a DW_AT_const_value with
a larger size-cut-off if we don't).


[Bug target/67349] New: ICE on optimization

2015-08-25 Thread ysato at users dot sourceforge.jp
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67349

Bug ID: 67349
   Summary: ICE on optimization
   Product: gcc
   Version: 6.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: ysato at users dot sourceforge.jp
  Target Milestone: ---

Created attachment 36252
  -- https://gcc.gnu.org/bugzilla/attachment.cgi?id=36252action=edit
problem source

I got following error.

$ LANG=C h8300-unknown-linux-gcc -c -O2 -fconserve-stack sync.c
In file included from include/linux/rcupdate.h:429:0,
 from include/linux/rcusync.h:5,
 from kernel/rcu/sync.c:1:
include/linux/rcutiny.h: In function 'rcu_barrier_sched':
include/linux/rcutiny.h:55:91: internal compiler error: Segmentation fault
0xb11f6f crash_signal
../../gcc/toplev.c:352
0xc1fe56 tree_check
../../gcc/tree.h:2857
0xc1fe56 fold_builtin_alloca_with_align
../../gcc/tree-ssa-ccp.c:2110
0xc1fe56 ccp_fold_stmt
../../gcc/tree-ssa-ccp.c:2215
0xca6cba substitute_and_fold_dom_walker::before_dom_children(basic_block_def*)
../../gcc/tree-ssa-propagate.c:1226
0xe6a7f7 dom_walker::walk(basic_block_def*)
../../gcc/domwalk.c:177
0xca64a9 substitute_and_fold(tree_node* (*)(tree_node*), bool
(*)(gimple_stmt_iterator*), bool)
../../gcc/tree-ssa-propagate.c:1319
0xc17efc ccp_finalize
../../gcc/tree-ssa-ccp.c:951
0xc17efc do_ssa_ccp
../../gcc/tree-ssa-ccp.c:2410
0xc17efc execute
../../gcc/tree-ssa-ccp.c:2442
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.

without -fconserve-stack in no problem.


Re: Moving to git

2015-08-25 Thread Andreas Schwab
Jakub Jelinek ja...@redhat.com writes:

 On Mon, Aug 24, 2015 at 10:22:22AM +0200, Andreas Schwab wrote:
 Jakub Jelinek ja...@redhat.com writes:
 
  And for those really identifying them by sha1 hashes is significantly
  worse than using monotonically increasing small number, sha1 hashes
  are impossible to remember, and you don't know what is earlier and
  what is later from just looking at it.
 
 git describe gives you such a number (relative to a tag).

 But it is not unique across different branches,

It can't be, due to the distributed nature of git.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
And now for something completely different.


Re: [PATCH 1/5] Refactor completely_scalarize_var

2015-08-25 Thread Jeff Law

On 08/25/2015 05:06 AM, Alan Lawrence wrote:

This is a small refactoring/renaming patch, it just moves the call to
completely_scalarize_record out from completely_scalarize_var, and renames
the latter to create_total_scalarization_access.

This is because the next patch needs to drop the _record suffix and I felt
it would be confusing to have both completely_scalarize and
completely_scalarize_var. However, it also makes the new function name
(create_total_scalarization_access) consistent with the existing code  comment.

Bootstrapped + check-gcc on x86_64.

gcc/ChangeLog:

* tree-sra.c (completely_scalarize_var): Rename to...
(create_total_scalarization_access): ... Here. Drop call to
completely_scalarize_record.

(analyze_all_variable_accesses): Replace completely_scalarize_var
with create_total_scalarization_access and completely_scalarize_record.

OK.
Jeff



Re: Indirect jumps

2015-08-25 Thread Jeff Law

On 08/25/2015 08:11 AM, Nathan Sidwell wrote:

Ptx is one of those rare (unique?) machines that doesn't  have an
indirect branch.  optabs  is prepared for such  a target and emits a
sorry when an indirect branch is needed.  However it then goes on to try
and  emit such an instruction and ends up ICEing.

Fixed thusly, ok?  (Or is the right solution to define a dummy indirect
branch in the PTX  md file?)

I think we're trying to generally get away from dummy patterns.

We could emulate by creating a new stack frame and shoving the target of 
the branch into the stack, then executing a return.  However, I don't 
think that's worth doing ;-)


I think the patch is fine for the trunk.

jeff


[gomp4] add reduction lock initializer

2015-08-25 Thread Nathan Sidwell
Cesar discovered another quirk of PTX.  Inspite of PTX documenting that static 
variables can be initialized and default to zero, there's a little note that it 
doesn't work for .shared variables.  Thus we need code to initialize the worker 
lock variable used for reductions.


This implements a new internal function 'IFN_GOACC_LOCK_INIT', with the same 
arguments as the LOCK and UNLOCK functions.  The intent is that it is emitted at 
the reduction setup point and expands to target-specific code.


For PTX it's deleted for everything but worker level, and for that we expand to 
an initialization of the lock variable.  We can simply use the same insn as the 
unlocker, but I renamed it to be less confusing.


nathan
2015-08-25  Nathan Sidwell  nat...@codesourcery.com

	* targhooks.h (default_goacc_lock_unlock): Rename to ...
	(default_goacc_lock): ... here.  Adjust.
	* config/nvptx/nvptx.md (oacc_expand_lock, oacc_expand_unlock):
	Adjust call to lock expander.
	(oacc_expand_lock_init): New.
	(nvptx_spinlock, nvptx_spinunlock): Rename to ...
	(nvptx_spin_lock, nvtx_spin_reset): ... here.
	* config/nvptx/ntptx.c (nvptx_expand_oacc_lock_unlock): Rename to ...
	(nvptx_expand_oacc_lock): ... here.  Deal with init too.
	(nvptx_xform_lock_unlock): Rename to ...
	(nvptx_xform_lock): ... here.  Deal with init too.
	(TARGET_GOACC_LOCK_UNLOCK): Replace with ...
	(TARGET_GOACC_LOCK): ... this.
	* omp-low.c (exectute_oacc_transform): Deal with
	IFN_GOACC_LOCK_INIT.
	(default_goacc_lock_unlock): Rename to ...
	(default_goacc_lock): ... here.  Deal with init too.
	* internal-fn.c (expand_GOACC_LOCK_INIT): New.
	* internal-fn.def (GOACC_LOCK_INIT): New.
	* doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Replace with ...
	(TARGET_GOACC_LOCK): ... this.
	* doc/tm.texi: Rebuilt.
	* target.def (goacc lock_unlock): Replace with ...
	(goacc lock): ... this.  Deal with init too.

Index: gcc/targhooks.h
===
--- gcc/targhooks.h	(revision 227174)
+++ gcc/targhooks.h	(working copy)
@@ -110,7 +110,7 @@ extern void default_destroy_cost_data (v
 extern bool default_goacc_validate_dims (tree, int [], int);
 extern unsigned default_goacc_dim_limit (unsigned);
 extern bool default_goacc_fork_join (gimple, const int [], bool);
-extern bool default_goacc_lock_unlock (gimple, const int [], bool);
+extern bool default_goacc_lock (gimple, const int [], unsigned);
 
 /* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
Index: gcc/config/nvptx/nvptx.md
===
--- gcc/config/nvptx/nvptx.md	(revision 227174)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -1371,7 +1371,7 @@
 		   UNSPECV_LOCK)]
   
 {
-  nvptx_expand_oacc_lock_unlock (operands[0], true);
+  nvptx_expand_oacc_lock (operands[0], 0);
   DONE;
 })
 
@@ -1381,7 +1381,17 @@
 		   UNSPECV_LOCK)]
   
 {
-  nvptx_expand_oacc_lock_unlock (operands[0], false);
+  nvptx_expand_oacc_lock (operands[0], +1);
+  DONE;
+})
+
+(define_expand oacc_lock_init
+  [(unspec_volatile:SI [(match_operand:SI 0 const_int_operand )
+		(match_operand:SI 1 const_int_operand )]
+		   UNSPECV_LOCK)]
+  
+{
+  nvptx_expand_oacc_lock (operands[0], -1);
   DONE;
 })
 
@@ -1592,8 +1602,8 @@
   
   membar%B0;)
 
-;; spinlock and unlock
-(define_insn nvptx_spinlock
+;; spin lock and reset
+(define_insn nvptx_spin_lock
[(parallel
  [(unspec_volatile [(match_operand:SI 0 memory_operand m)
 			(match_operand:SI 1 const_int_operand i)]
@@ -1604,7 +1614,7 @@

%4:\\tatom%R1.cas.b32 %2,%0,0,1;setp.ne.u32 %3,%2,0;@%3 bra.uni %4;)
 
-(define_insn nvptx_spinunlock
+(define_insn nvptx_spin_reset
[(unspec_volatile [(match_operand:SI 0 memory_operand m)
 		  (match_operand:SI 1 const_int_operand i)]
 		  UNSPECV_LOCK)
Index: gcc/config/nvptx/nvptx.c
===
--- gcc/config/nvptx/nvptx.c	(revision 227174)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -1220,7 +1220,7 @@ nvptx_expand_oacc_join (unsigned mode)
gang or worker level.  */
 
 void
-nvptx_expand_oacc_lock_unlock (rtx src, bool lock)
+nvptx_expand_oacc_lock (rtx src, int direction)
 {
   unsigned HOST_WIDE_INT kind;
   rtx pat;
@@ -1230,22 +1230,26 @@ nvptx_expand_oacc_lock_unlock (rtx src,
 
   rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]);
   rtx space = GEN_INT (lock_space[kind]);
-  rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+  rtx barrier = NULL_RTX;
   rtx tmp = gen_reg_rtx (SImode);
 
-  if (!lock)
+  if (direction = 0)
+barrier = gen_nvptx_membar (GEN_INT (lock_level[kind]));
+
+  if (direction  0)
 emit_insn (barrier);
-  if (lock)
+  if (!direction)
 {
   rtx_code_label *label = gen_label_rtx ();
 
   LABEL_NUSES (label)++;
-  pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), label);
+  pat = 

[Bug c++/67350] New: auto deduction error in variable template lambda

2015-08-25 Thread norbert.pfeiler+gcc.gnu.org/bugzilla at gmail dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67350

Bug ID: 67350
   Summary: auto deduction error in variable template lambda
   Product: gcc
   Version: 5.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: norbert.pfeiler+gcc.gnu.org/bugzilla at gmail dot com
  Target Milestone: ---

templatetypename T
auto test = [](){
return T{};
};

int main() {
testint();
}

error: use of 'testint' before deduction of 'auto'

Clang 3.6.2 accepts…

I’m not exactly sure this code is valid, but i can’t find much about variable
templates and lambdas.

Re: [AArch64] [TLSIE][2/2] Implement TLS IE for tiny model

2015-08-25 Thread Marcus Shawcroft
On 19 June 2015 at 10:15, Jiong Wang jiong.w...@arm.com wrote:

 Currently, TLS IE is supported on small model only. This patch implement
 TLS Initial-exec model support for AArch64 tiny memory model.

 Under tiny model, we only allow 1M loadable segment size, one single ldr
 instruction is enough for addressing the got entry for TLS IE directly.

 The code sequence is:

 A:  mrs  tp, tpidr_el0
 B0: ldr  t0, :gottprel:x1  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19  x1
 B1: add  t0, t0, tp

 B0 and B1 should not be scheduled, as the pattern will be recognized
 later for linker IE model to LE model optimization.

 2015-06-19  Marcus Shawcroft marcus.shawcr...@arm.com
 Jiong Wang  jiong.w...@arm.com

 gcc/
   * config/aarch64/aarch64.md (UNSPEC_GOTTINYTLS): New UNSPEC.
   (tlsie_tiny_mode): New define_insn.
   (tlsie_tiny_sidi): Ditto.
   * config/aarch64/aarch64-protos.h (aarch64_symbol_type): Define
   SYMBOL_TINY_TLSIE.
   (aarch64_symbol_context): New comment for SYMBOL_TINY_TLSIE.
   * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Support
   SYMBOL_TINY_TLSIE.
   (aarch64_expand_mov_immediate): Ditto.
   (aarch64_print_operand): Ditto.
   (arch64_classify_tls_symbol): Ditto.

 gcc/testsuite/
   * gcc.target/aarch64/tlsie_tiny.c: New testcase.

 --
 Regards,
 Jiong


OK /Marcus


[PATCH, PR other/67320] Fix wide add standard names

2015-08-25 Thread Michael Collison
The standard names for signed and unsigned vector wide adds are wrong in 
the documentation.


OK for trunk?

2015-08-25  Michael Collison  michael.colli...@linaro.org

PR other/67320
* doc/md.texi: Rename [su]sum_widen to widen_[su]sum to reflect correct
standard names

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 0bffdc6..619259f 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4946,10 +4946,10 @@ is of a wider mode, is computed and added to 
operand 3. Operand 3 is of a mode
 equal or wider than the mode of the absolute difference. The result is 
placed

 in operand 0, which is of the same mode as operand 3.

-@cindex @code{ssum_widen@var{m3}} instruction pattern
-@item @samp{ssum_widen@var{m3}}
-@cindex @code{usum_widen@var{m3}} instruction pattern
-@itemx @samp{usum_widen@var{m3}}
+@cindex @code{widen_ssum@var{m3}} instruction pattern
+@item @samp{widen_ssum@var{m3}}
+@cindex @code{widen_usum@var{m3}} instruction pattern
+@itemx @samp{widen_usum@var{m3}}
 Operands 0 and 2 are of the same mode, which is wider than the mode of
 operand 1. Add operand 1 to operand 2 and place the widened result in
 operand 0. (This is used express accumulation of elements into an 
accumulator


--
Michael Collison
Linaro Toolchain Working Group
michael.colli...@linaro.org



Re: [PATCH][AARCH64]Fix for branch offsets over 1 MiB

2015-08-25 Thread Andrew Pinski
On Tue, Aug 25, 2015 at 5:50 PM, Andrew Pinski pins...@gmail.com wrote:
 On Tue, Aug 25, 2015 at 5:37 PM, Andre Vieira
 andre.simoesdiasvie...@arm.com wrote:
 Conditional branches have a maximum range of [-1048576, 1048572]. Any
 destination further away can not be reached by these.
 To be able to have conditional branches in very large functions, we invert
 the condition and change the destination to jump over an unconditional
 branch to the original, far away, destination.

 gcc/ChangeLog:
 2015-08-07  Ramana Radhakrishnan  ramana.radhakrish...@arm.com
 Andre Vieira  andre.simoesdiasvie...@arm.com

 * config/aarch64/aarch64.md (*condjump): Handle functions  1
 Mib.
 (*cboptabmode1): Idem.
 (*tboptabmode1): Idem.
 (*cboptabmode1): Idem.
 * config/aarch64/iterators.md (inv_cb): New code attribute.
 (inv_tb): Idem.
 * config/aarch64/aarch64.c (aarch64_gen_far_branch): New.
 * config/aarch64/aarch64-protos.h (aarch64_gen_far_branch): New.

 gcc/testsuite/ChangeLog:
 2015-08-07  Andre Vieira  andre.simoesdiasvie...@arm.com

 * gcc.target/aarch64/long-branch.c: New test.

 Just a few comments about the testcase.  You could improve the size
 (on disk) of the testcase by using the preprocessor some more:
 Something like:
 #define CASE_ENTRY2 (x) CASE_ENTRY ((x)) CASE_ENTRY ((x)+1)
 #define CASE_ENTRY4 (x) CASE_ENTRY2 ((x)) CASE_ENTRY2 ((x)+2+1)
 #define CASE_ENTRY8 (x) CASE_ENTRY4 ((x)) CASE_ENTRY4 ((x)+4+1)
 #define CASE_ENTRY16 (x) CASE_ENTRY8 ((x)) CASE_ENTRY8 ((x)+8+1)
 #define CASE_ENTRY32 (x) CASE_ENTRY16 ((x)) CASE_ENTRY16 ((x)+16)
 #define CASE_ENTRY64 (x) CASE_ENTRY32 ((x)) CASE_ENTRY32 ((x)+32+1)
 #define CASE_ENTRY128 (x) CASE_ENTRY64 ((x)) CASE_ENTRY16 ((x)+64+1)
 #define CASE_ENTRY256 (x) CASE_ENTRY128 ((x)) CASE_ENTRY128 ((x)+128+1)


I do have an off by one error but you should get the idea.  Basically
instead of 200 lines, we only have 9 lines (log2(256) == 8).

Thanks,
Andrew


 And then use
 CASE_ENTRY256 (1)

 You can do the same trick to reduce the size of CASE_ENTRY too.

 Thanks,
 Andrew Pinski


[PATCH] Fix PR67306

2015-08-25 Thread Richard Biener

The following fixes ICEs due to the genmatch generated code for
GENERIC not verifying if builtin_decl_implicit returns non-NULL.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-08-25  Richard Biener  rguent...@suse.de

PR middle-end/67306
* genmatch.c (expr::gen_transform): Verify the result of
builtin_decl_implicit.
(dt_simplify::gen_1): Likewise.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 227058)
+++ gcc/genmatch.c  (working copy)
@@ -2177,11 +2216,19 @@ expr::gen_transform (FILE *f, int indent
fprintf_indent (f, indent, res = fold_build%d_loc (loc, %s, %s,
ops.length(), opr_name, type);
   else
-   fprintf_indent (f, indent, res = build_call_expr_loc (loc, 
-   builtin_decl_implicit (%s), %d, opr_name, 
ops.length());
+   {
+ fprintf_indent (f, indent, {\n);
+ fprintf_indent (f, indent,   tree decl = builtin_decl_implicit 
(%s);\n,
+ opr_name);
+ fprintf_indent (f, indent,   if (!decl) return NULL_TREE;\n);
+ fprintf_indent (f, indent,   res = build_call_expr_loc (loc, 
+ decl, %d, ops.length());
+   }
   for (unsigned i = 0; i  ops.length (); ++i)
fprintf (f, , ops%d[%u], depth, i);
   fprintf (f, );\n);
+  if (opr-kind != id_base::CODE)
+   fprintf_indent (f, indent, }\n);
   if (*opr == CONVERT_EXPR)
{
  indent -= 2;
@@ -3069,13 +3147,24 @@ dt_simplify::gen_1 (FILE *f, int indent,
*e-operation == CONVERT_EXPR
? NOP_EXPR : e-operation-id);
  else
-   fprintf_indent (f, indent,
-   res = build_call_expr_loc 
-   (loc, builtin_decl_implicit (%s), %d,
-   e-operation-id, e-ops.length());
+   {
+ fprintf_indent (f, indent,
+ {\n);
+ fprintf_indent (f, indent,
+   tree decl = builtin_decl_implicit 
(%s);\n,
+ e-operation-id);
+ fprintf_indent (f, indent,
+   if (!decl) return NULL_TREE;\n);
+ fprintf_indent (f, indent,
+   res = build_call_expr_loc 
+ (loc, decl, %d,
+ e-ops.length());
+   }
  for (unsigned j = 0; j  e-ops.length (); ++j)
fprintf (f, , res_op%d, j);
  fprintf (f, );\n);
+ if (!is_a operator_id * (opr))
+   fprintf_indent (f, indent, }\n);
}
}
}


[Bug middle-end/67005] [5/6 Regression] ICE: in verify_loop_structure, at cfgloop.c:1647 (loop with header n not in loop tree)

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67005

--- Comment #4 from Richard Biener rguenth at gcc dot gnu.org ---
Note we already do

/* If we made a BB unconditionally exit a loop then this
   transform alters the set of BBs in the loop.  Schedule
   a fixup.  */
if (loop_exit_edge_p (bb-loop_father, e))
  loops_state_set (LOOPS_NEED_FIXUP);
remove_edge (e2);

thus this would need to add sth like || e2-flags  IRREDUCIBLE_LOOP


[Bug tree-optimization/67323] Use non-unit stride loads by preference when applicable

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67323

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Status|UNCONFIRMED |ASSIGNED
   Last reconfirmed||2015-08-25
 CC|richard.guenther at gmail dot com  |rguenth at gcc dot 
gnu.org
 Depends on||66721
   Assignee|unassigned at gcc dot gnu.org  |rguenth at gcc dot 
gnu.org
 Ever confirmed|0   |1

--- Comment #1 from Richard Biener rguenth at gcc dot gnu.org ---
Confirmed.  We go down the SLP path here because the vectorizer thinks that
SLP is always cheaper than using interleaving (which generally is true
if there were not targets which can do the load plus interleave with
load-lanes ...).

I think this may be a regression as well because I enhanced SLP to apply
to way more cases.

Note that my plan is to make the vectorizer consider both (well, not really,
but this bug shows I maybe should try), SLP and non-SLP, and evaluate based
on costs which route to go.


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66721
[Bug 66721] [6 regression] gcc.target/i386/pr61403.c FAILs


Re: [PATCH][AARCH64]Fix for branch offsets over 1 MiB

2015-08-25 Thread Andrew Pinski
On Tue, Aug 25, 2015 at 5:37 PM, Andre Vieira
andre.simoesdiasvie...@arm.com wrote:
 Conditional branches have a maximum range of [-1048576, 1048572]. Any
 destination further away can not be reached by these.
 To be able to have conditional branches in very large functions, we invert
 the condition and change the destination to jump over an unconditional
 branch to the original, far away, destination.

 gcc/ChangeLog:
 2015-08-07  Ramana Radhakrishnan  ramana.radhakrish...@arm.com
 Andre Vieira  andre.simoesdiasvie...@arm.com

 * config/aarch64/aarch64.md (*condjump): Handle functions  1
 Mib.
 (*cboptabmode1): Idem.
 (*tboptabmode1): Idem.
 (*cboptabmode1): Idem.
 * config/aarch64/iterators.md (inv_cb): New code attribute.
 (inv_tb): Idem.
 * config/aarch64/aarch64.c (aarch64_gen_far_branch): New.
 * config/aarch64/aarch64-protos.h (aarch64_gen_far_branch): New.

 gcc/testsuite/ChangeLog:
 2015-08-07  Andre Vieira  andre.simoesdiasvie...@arm.com

 * gcc.target/aarch64/long-branch.c: New test.

Just a few comments about the testcase.  You could improve the size
(on disk) of the testcase by using the preprocessor some more:
Something like:
#define CASE_ENTRY2 (x) CASE_ENTRY ((x)) CASE_ENTRY ((x)+1)
#define CASE_ENTRY4 (x) CASE_ENTRY2 ((x)) CASE_ENTRY2 ((x)+2+1)
#define CASE_ENTRY8 (x) CASE_ENTRY4 ((x)) CASE_ENTRY4 ((x)+4+1)
#define CASE_ENTRY16 (x) CASE_ENTRY8 ((x)) CASE_ENTRY8 ((x)+8+1)
#define CASE_ENTRY32 (x) CASE_ENTRY16 ((x)) CASE_ENTRY16 ((x)+16)
#define CASE_ENTRY64 (x) CASE_ENTRY32 ((x)) CASE_ENTRY32 ((x)+32+1)
#define CASE_ENTRY128 (x) CASE_ENTRY64 ((x)) CASE_ENTRY16 ((x)+64+1)
#define CASE_ENTRY256 (x) CASE_ENTRY128 ((x)) CASE_ENTRY128 ((x)+128+1)

And then use
CASE_ENTRY256 (1)

You can do the same trick to reduce the size of CASE_ENTRY too.

Thanks,
Andrew Pinski


[Bug c++/67345] -Woverloaded-virtual false negative: Does not warn on overloaded virtual function

2015-08-25 Thread EisahLee at gmx dot de
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67345

--- Comment #2 from EisahLee at gmx dot de ---
I see the hiding as a potential design error, or however that is called: A
shortcoming of the way the methods were named.

Clang 4.5 does not warn until there is such a bad call.

Is there a compiler + flag combination that can provide such a warning?


[Bug tree-optimization/67306] Patterns ICEs when moved using simplify and match

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67306

Richard Biener rguenth at gcc dot gnu.org changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

--- Comment #4 from Richard Biener rguenth at gcc dot gnu.org ---
Fixed.


[Bug tree-optimization/67306] Patterns ICEs when moved using simplify and match

2015-08-25 Thread rguenth at gcc dot gnu.org
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67306

--- Comment #5 from Richard Biener rguenth at gcc dot gnu.org ---
Author: rguenth
Date: Tue Aug 25 10:29:09 2015
New Revision: 227163

URL: https://gcc.gnu.org/viewcvs?rev=227163root=gccview=rev
Log:
2015-08-25  Richard Biener  rguent...@suse.de

PR middle-end/67306
* genmatch.c (expr::gen_transform): Verify the result of
builtin_decl_implicit.
(dt_simplify::gen_1): Likewise.

Modified:
trunk/gcc/ChangeLog
trunk/gcc/genmatch.c


Re: [PATCH 12/15][AArch64] Add vcvt(_high)?_f32_f16 intrinsics, with BE RTL fix

2015-08-25 Thread Alan Lawrence
James Greenhalgh wrote:

 -  VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
 +  VAR2 (UNOP, vec_unpacks_hi_, 10, v4sf, v8hf)

 Should this not use the appropriate BUILTIN_... iterator?

Indeed; BUILTIN_VQ_HSF it is.

VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
VAR1 (BINOP, float_truncate_hi_, 0, v8hf)

I could also use BUILTIN_VQ_HSF here (these two were added in a previous patch, 
before the VQ_HSF iterator was introduced). However, that goes against the 
principle that we should use the same iterator as the pattern (the pattern uses 
the Vdbl attribute of the VDF iterator), so I'm not sure whether that would 
be preferable (i.e. as a separate patch)?

 -  VAR1 (UNOP, float_extend_lo_, 0, v2df)
 +  VAR2 (UNOP, float_extend_lo_, 0, v2df, v4sf)

 Likewise.

Similarly, the required iterator does not exist, as float_extend_lo_ is named
after the Vwide attribute of the VDF iterator. The nearest equivalents I can
see use two VAR1's rather than a VAR2, so I've updated the patch to do that too.

OK with those two changes? (patch attached and bootstrapped+check-gcc on 
aarch64-none-linux-gnu)

Thanks, Alan
---
 gcc/config/aarch64/aarch64-simd-builtins.def |  3 +-
 gcc/config/aarch64/aarch64-simd.md   | 63 ++--
 gcc/config/aarch64/arm_neon.h| 16 +--
 gcc/config/aarch64/iterators.md  | 18 +---
 4 files changed, 69 insertions(+), 31 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index c5b46aa..2c13cfb 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -361,11 +361,12 @@
   BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
   BUILTIN_VDQF (UNOP, abs, 2)
 
-  VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
+  BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
   VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
   VAR1 (BINOP, float_truncate_hi_, 0, v8hf)
 
   VAR1 (UNOP, float_extend_lo_, 0, v2df)
+  VAR1 (UNOP, float_extend_lo_,  0, v4sf)
   BUILTIN_VDF (UNOP, float_truncate_lo_, 0)
 
   /* Implemented by aarch64_ld1VALL_F16:mode.  */
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index f8754cd..160acf9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1691,36 +1691,57 @@
 
 ;; Float widening operations.
 
-(define_insn vec_unpacks_lo_v4sf
-  [(set (match_operand:V2DF 0 register_operand =w)
-   (float_extend:V2DF
- (vec_select:V2SF
-   (match_operand:V4SF 1 register_operand w)
-   (parallel [(const_int 0) (const_int 1)])
- )))]
+(define_insn aarch64_simd_vec_unpacks_lo_mode
+  [(set (match_operand:VWIDE 0 register_operand =w)
+(float_extend:VWIDE (vec_select:VHALF
+  (match_operand:VQ_HSF 1 register_operand w)
+  (match_operand:VQ_HSF 2 vect_par_cnst_lo_half 
)
+   )))]
   TARGET_SIMD
-  fcvtl\\t%0.2d, %1.2s
+  fcvtl\\t%0.Vwtype, %1.Vhalftype
   [(set_attr type neon_fp_cvt_widen_s)]
 )
 
-(define_insn aarch64_float_extend_lo_v2df
-  [(set (match_operand:V2DF 0 register_operand =w)
-   (float_extend:V2DF
- (match_operand:V2SF 1 register_operand w)))]
+(define_expand vec_unpacks_lo_mode
+  [(match_operand:VWIDE 0 register_operand )
+   (match_operand:VQ_HSF 1 register_operand )]
   TARGET_SIMD
-  fcvtl\\t%0.2d, %1.2s
+  {
+rtx p = aarch64_simd_vect_par_cnst_half (MODEmode, false);
+emit_insn (gen_aarch64_simd_vec_unpacks_lo_mode (operands[0],
+  operands[1], p));
+DONE;
+  }
+)
+
+(define_insn aarch64_simd_vec_unpacks_hi_mode
+  [(set (match_operand:VWIDE 0 register_operand =w)
+(float_extend:VWIDE (vec_select:VHALF
+  (match_operand:VQ_HSF 1 register_operand w)
+  (match_operand:VQ_HSF 2 vect_par_cnst_hi_half 
)
+   )))]
+  TARGET_SIMD
+  fcvtl2\\t%0.Vwtype, %1.Vtype
   [(set_attr type neon_fp_cvt_widen_s)]
 )
 
-(define_insn vec_unpacks_hi_v4sf
-  [(set (match_operand:V2DF 0 register_operand =w)
-   (float_extend:V2DF
- (vec_select:V2SF
-   (match_operand:V4SF 1 register_operand w)
-   (parallel [(const_int 2) (const_int 3)])
- )))]
+(define_expand vec_unpacks_hi_mode
+  [(match_operand:VWIDE 0 register_operand )
+   (match_operand:VQ_HSF 1 register_operand )]
+  TARGET_SIMD
+  {
+rtx p = aarch64_simd_vect_par_cnst_half (MODEmode, true);
+emit_insn (gen_aarch64_simd_vec_unpacks_lo_mode (operands[0],
+  operands[1], p));
+DONE;
+  }
+)
+(define_insn aarch64_float_extend_lo_Vwide
+  [(set (match_operand:VWIDE 0 register_operand =w)
+   (float_extend:VWIDE
+ (match_operand:VDF 1 register_operand w)))]
   TARGET_SIMD
-  fcvtl2\\t%0.2d, %1.4s
+  fcvtl\\t%0Vmwtype, %1Vmtype
   [(set_attr type 

  1   2   >