date:20150528

[PATCH] unify -posix/-pthread cpp handling for gnu-user targets

2015-05-28 Thread Mike Frysinger

Many Linux targets duplicate the cpp spec macros for turning -posix/-thread
into the right defines.  Some Linux targets forget to do this entirely and
can be hard to notice.  Add common definitions to the gnu headers (since
these are really in relation to the C library) and drop the duplications in
the target headers.

I've tested a few targets (aarch64, cris, x86_64) to make sure -dumpspecs
still includes the right content.

Some targets still define -posix/-pthread in SUBTARGET_CPP_SPEC and in
CPP_SUBTARGET_SPEC, but I can't seem to find any reference to either of
those defines.  Are they dead/confused code and I should just delete it ?

2015-05-28  Mike Frysinger  vap...@gentoo.org

* config/aarch64/aarch64-linux.h (CPP_SPEC): Delete.
* config/alpha/linux.h (CPP_SPEC): Delete.
* config/arm/linux-gas.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/cris/linux.h (CRIS_CPP_SUBTARGET_SPEC): Remove -pthread.
* config/gnu-user.h (GNU_USER_CPP_SPEC, CPP_SPEC): Define.
* config/gnu.h (GNU_USER_CPP_SPEC): Define.
(CPP_SPEC): Change to GNU_USER_CPP_SPEC.
* config/i386/gnu-user-common.h (CPP_SPEC): Delete.
* config/ia64/linux.h (CPP_SPEC): Delete.
* config/m32r/linux.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/m68k/linux.h (CPP_SPEC): Delete.
* config/microblaze/linux.h (CPP_SPEC): Delete.
* config/mips/gnu-user.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/mn10300/linux.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/nios2/linux.h (CPP_SPEC): Delete.
* config/pa/pa-linux.h (CPP_SPEC): Delete.
* config/s390/linux.h (CPP_SPEC): Delete.
* config/sh/linux.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/sparc/linux.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/sparc/linux64.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
* config/tilegx/linux.h (CPP_SPEC): Delete.
* config/tilepro/linux.h (CPP_SPEC): Delete.
* config/vax/linux.h (CPP_SPEC): Delete.
* config/xtensa/linux.h (SUBTARGET_CPP_SPEC): Change to
GNU_USER_CPP_SPEC.
---
 gcc/config/aarch64/aarch64-linux.h | 2 --
 gcc/config/alpha/linux.h   | 3 ---
 gcc/config/arm/linux-gas.h | 2 +-
 gcc/config/cris/linux.h| 6 ++
 gcc/config/gnu-user.h  | 5 +
 gcc/config/gnu.h   | 3 ++-
 gcc/config/i386/gnu-user-common.h  | 3 ---
 gcc/config/ia64/linux.h| 2 --
 gcc/config/m32r/linux.h| 5 ++---
 gcc/config/m68k/linux.h| 3 ---
 gcc/config/microblaze/linux.h  | 3 ---
 gcc/config/mips/gnu-user.h | 2 +-
 gcc/config/mn10300/linux.h | 6 +++---
 gcc/config/nios2/linux.h   | 3 ---
 gcc/config/pa/pa-linux.h   | 3 ---
 gcc/config/s390/linux.h| 2 --
 gcc/config/sh/linux.h  | 5 ++---
 gcc/config/sparc/linux.h   | 3 +--
 gcc/config/sparc/linux64.h | 5 +
 gcc/config/tilegx/linux.h  | 3 ---
 gcc/config/tilepro/linux.h | 3 ---
 gcc/config/vax/linux.h | 3 ---
 gcc/config/xtensa/linux.h  | 2 +-
 23 files changed, 21 insertions(+), 56 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-linux.h 
b/gcc/config/aarch64/aarch64-linux.h
index 1600a32..7d356dd 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -32,8 +32,6 @@
 #undef  CC1_SPEC
 #define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC
 
-#define CPP_SPEC %{pthread:-D_REENTRANT}
-
 #define LINUX_TARGET_LINK_SPEC  %{h*} \
%{static:-Bstatic}  \
%{shared:-shared}   \
diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h
index 475ea06..e609f38 100644
--- a/gcc/config/alpha/linux.h
+++ b/gcc/config/alpha/linux.h
@@ -39,9 +39,6 @@ along with GCC; see the file COPYING3.  If not see
%{shared:-lc} \
%{!shared: %{profile:-lc_p}%{!profile:-lc}}
 
-#undef CPP_SPEC
-#define CPP_SPEC %{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}
-
 /* Show that we need a GP when profiling.  */
 #undef TARGET_PROFILING_NEEDS_GP
 #define TARGET_PROFILING_NEEDS_GP 1
diff --git a/gcc/config/arm/linux-gas.h b/gcc/config/arm/linux-gas.h
index d3a3196..6ce62c7 100644
--- a/gcc/config/arm/linux-gas.h
+++ b/gcc/config/arm/linux-gas.h
@@ -31,7 +31,7 @@
 #define DEFAULT_SIGNED_CHAR 0
 
 #undef  SUBTARGET_CPP_SPEC
-#define SUBTARGET_CPP_SPEC  %{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}
+#define SUBTARGET_CPP_SPEC GNU_USER_CPP_SPEC
 
 #undef  SIZE_TYPE
 #define SIZE_TYPE unsigned int
diff --git a/gcc/config/cris/linux.h b/gcc/config/cris/linux.h
index 262aac5..043a1ac 100644
--- a/gcc/config/cris/linux.h
+++ b/gcc/config/cris/linux.h
@@ -54,12 +54,10 @@ along with GCC; see the file COPYING3.  If

Re: [PATCH] Extend -fno-plt to normal non-PIC branches on x86

2015-05-28 Thread H.J. Lu

On Thu, May 28, 2015 at 10:37:53AM -0700, H.J. Lu wrote:
 This patch extends -fno-plt to normal non-PIC calls on x86.  -fno-plt
 works in 64-bit mode with the existing binutils.  For 32-bit, we need
 the updated assembler and linker to support call/jmp *foo@GOT with
 a new relocation different from R_386_GOT32 to indicate that this
 relocation applies to indirect branches.  A configure time check is
 added to verify that 32-bit assembler generates a known relocation
 which is different from R_386_GOT32.  A new 32-bit relocaton is needed
 since call/jmp *foo@GOT requires a different relocation from R_386_GOT32
 which is used together with a GOT register in call/jmp *foo@GOT(%reg).
 
 OK for master?
 
 Thanks.
 
 H.J.
 ---
   * configure.ac (HAVE_AS_INDIRECT_BRANCH_VIA_GOT): New.  Defined
   if 32-bit assembler generates a known relocation which is
   different from R_386_GOT32.
   * config.in: Regenerated.
   * configure: Likewise.
   * config/i386/i386.c (ix86_output_call_insn):  Extend -fno-plt
   to normal non-PIC branches.


Here is the updated patch to properly handle local functions with
testcases.


H.J.
---
gcc/

* configure.ac (HAVE_AS_INDIRECT_BRANCH_VIA_GOT): New.  Defined
if 32-bit assembler generates a known relocation which is
different from R_386_GOT32.
* config.in: Regenerated.
* configure: Likewise.
* config/i386/i386.c (ix86_output_call_insn):  Extend -fno-plt
to normal non-PIC branches.
* config/i386/i386.h (TARGET_ELF): New.

gcc/testsuite/

* gcc.target/i386/pr66232-6.c: New tests.
* gcc.target/i386/pr66232-7.c: Likewise.
* gcc.target/i386/pr66232-8.c: Likewise.
* gcc.target/i386/pr66232-9.c: Likewise.
* gcc.target/i386/pr66232-10.c: Likewise.
* gcc.target/i386/pr66232-11.c: Likewise.
* gcc.target/i386/pr66232-12.c: Likewise.
* gcc.target/i386/pr66232-13.c: Likewise.
* lib/target-supports.exp (check_effective_target_branch_via_got):
New.
---
 gcc/config.in  | 14 +---
 gcc/config/i386/i386.c | 40 +--
 gcc/config/i386/i386.h |  2 ++
 gcc/configure  | 47 ++-
 gcc/configure.ac   | 18 ++-
 gcc/testsuite/gcc.target/i386/pr66232-10.c | 13 
 gcc/testsuite/gcc.target/i386/pr66232-11.c | 14 
 gcc/testsuite/gcc.target/i386/pr66232-12.c | 13 
 gcc/testsuite/gcc.target/i386/pr66232-13.c | 13 
 gcc/testsuite/gcc.target/i386/pr66232-6.c  | 13 
 gcc/testsuite/gcc.target/i386/pr66232-7.c  | 14 
 gcc/testsuite/gcc.target/i386/pr66232-8.c  | 13 
 gcc/testsuite/gcc.target/i386/pr66232-9.c  | 13 
 gcc/testsuite/lib/target-supports.exp  | 52 ++
 14 files changed, 271 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66232-9.c

diff --git a/gcc/config.in b/gcc/config.in
index daaf906..0ee5c38 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -363,6 +363,12 @@
 #endif
 
 
+/* Define true if the assembler supports 'call *foo@GOT'. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+#endif
+
+
 /* Define if your assembler supports the Sun syntax for cmov. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_IX86_CMOV_SUN_SYNTAX
@@ -686,8 +692,8 @@
 #endif
 
 
-/* Define to 1 if we found a declaration for 'basename', otherwise define to
-   0. */
+/* Define to 1 if you have the declaration of `basename(const char*)', and to
+   0 if you don't. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_DECL_BASENAME
 #endif
@@ -963,8 +969,8 @@
 #endif
 
 
-/* Define to 1 if we found a declaration for 'strstr', otherwise define to 0.
-   */
+/* Define to 1 if you have the declaration of `strstr(const char*,const
+   char*)', and to 0 if you don't. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_DECL_STRSTR
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e77cd04..63ebc7f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25611,7 +25611,25 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = %!jmp\t%P0;
+   {
+ if (!SYMBOL_REF_LOCAL_P (call_op)
+  !flag_plt
+  !flag_pic
+  TARGET_ELF)
+   {
+ /* Avoid PLT.  */
+ if (TARGET_64BIT)
+

Re: debug mode maintenance patch

2015-05-28 Thread François Dumont


On 25/05/2015 20:41, Jonathan Wakely wrote:

On 25/05/15 15:31 +0200, François Dumont wrote:

Hi

   This is a patch to clean the debug mode code.

   I have introduced a new debug header, assertions.h, so that 
headers that only need _GLIBCXX_DEBUG_ASSERT do not have to include 
the big debug.h. I also introduce functions.tcc to isolate 
implementation of __foreign_iterator which require a number of other 
headers.


All other uses of .tcc extensions are headers included automatically
at the bottom of the corresponding .h header, so other headers never
need to do #include foo.tcc because that appears at the end of
foo.h (or foo for standard headers).

Also, .tcc is meant to be for definitions of non-inline templates that
are declared (but not defined) in the header that includes the .tcc
file, but all the functions you're moving to debug/functions.tcc are
still small and inline.

If the point is just to move some functions to a different header
because not all consumers of functions.h need those functions then I
think it should have a different name, not pretend to be related to
functions.h


Ok, then I kept __foreign_iterator within functions.h. I checked and 
none of the headers included are themselves including debug headers, as 
long as it is like that it is fine.




I'm not convinced moving them to a separate header is even a good
idea. Surely most headers that include debug/functions.h already end
up including bits/move.h and type_traits anyway?



   * include/debug/debug.h ([_GLIBCXX_DEBUG_ASSERT,
   _GLIBCXX_DEBUG_PEDASSERT, _GLIBCXX_DEBUG_ONLY]): Move definition...


These names should not be in square brackets (square brackets are used
to indicate conditional changes, see
http://www.gnu.org/prep/standards/html_node/Conditional-Changes.html)


Sorry, I saw it used so many times for macros that I though it was the 
right way to report macro modifications.


I also replicate Copyrights from debug.h to assertions.h.

* include/debug/debug.h (_GLIBCXX_DEBUG_ASSERT,
_GLIBCXX_DEBUG_PEDASSERT, _GLIBCXX_DEBUG_ONLY): Move definition...
* include/debug/assertions.h: ...here, new.
* include/debug/formatter.h
(struct _Error_formatter::_Is_iterator_value_type): New.
(struct _Error_formatter::_Is_instance): New.
(struct _Error_formatter::_Parameter): Make public and not friend
anymore.
(_Error_formatter::_Parameter::__instance): New _M_kind enum entry.
(_Error_formatter::_Parameter::__iterator_value_type): New _M_kind enum
entry.
(struct _Error_formatter::_Parameter::_Type): New.
(struct _Error_formatter::_Parameter::_Instance): New, inherit from
latter.
(union _Error_formatter::_Parameter::_M_variant): Reorganize.
(_Parameter(_Iterator const, const char*, _Is_iterator)): Make all
overloads take iterator through a const reference.
(_Parameter(const _Iterator, const char*, _Is_iterator_value_type)):
New.
(_Parameter(const _Type, const char*, _Is_instance)): New.
(_Error_formatter::_M_print_type): Delete.
(_Error_formatter::_M_iterator_value_type): New.
(_Error_formatter::_M_instance): New.
* include/Makefile.am: Add new above debug file.
* include/Makefile.in: Regenerate.
* include/debug/functions.h
(__check_dereferenceable(const _Safe_iterator),
__valid_range(const _Safe_iterator),
struct __is_safe_random_iterator_Safe_iterator): Move...
* include/debug/safe_iterator.h: ... here.
Replace debug.h include with assertions.h.
(__check_singular_aux): Move...
* include/debug/safe_base.h: ... here.
* include/debug/functions.h
(__check_dereferenceable(const _Safe_local_iterator),
__valid_range(const _Safe_local_iterator): Move...
* include/debug/safe_local_iterator.h: ...here.
* include/debug/safe_sequence.h: Replace debug.h with assertions.h.
Remove _Safe_iterator declaration.
* include/debug/safe_unordered_container.h: Replace debug.h with
assertions.h.
* include/debug/array: Replace safe_sequence.h include with
formatter.h and macros.h.
* include/debug/deque: Include functions.tcc.
* include/debug/forward_list: Likewise.
* include/debug/list: Likewise.
* include/debug/string: Likewise.
* include/debug/vector: Likewise.
* include/bits/unique_ptr.h: Replace debug.h include with new
assertions.h.
* include/bits/stl_iterator_base_funcs.h: Likewise.
* testsuite/23_containers/array/tuple_interface/get_debug_neg.cc:
Adjust dg-error line number.
* testsuite/23_containers/array/tuple_interface/
tuple_element_debug_neg.cc: Likewise.
* src/c++11/debug.cc: Adapt.

Tested under Linux x86_64.

Ok to commit ?

François

Index: include/Makefile.am
===
--- include/Makefile.am	(revision 223846)
+++ include/Makefile.am	(working copy)
@@ -759,6 +759,7 @@
 debug_builddir = ./debug
 debug_headers = \
 	${debug_srcdir}/array \
+

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Jan Hubicka

 On 05/28/2015 04:42 PM, Jan Hubicka wrote:
 As for optimization changing type representation, I suppose one case is when
 function with varray type gets inlined and the array bound happens to be a
 different expression afterwards.  We produce a new copy of the original type
 with different bounds then.
 
 That makes sense, but that would be a new type rather than
 modifications to the old type.

Yep, that is all I can think of :)
Somehow the early dwarf needs to bind to the sizes that are represented as 
gimple
registers.  How this case is handled with early debug?

Honza
 
 Jason

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Jason Merrill


On 05/28/2015 02:53 PM, Aldy Hernandez wrote:

On 05/27/2015 08:39 AM, Jason Merrill wrote:

On 05/20/2015 11:50 AM, Aldy Hernandez wrote:



+  /* Fill in the size of variable-length fields in late dwarf.  */
+  if (TREE_ASM_WRITTEN (type)
+   !early_dwarf_dumping)
+{
+  tree member;
+  for (member = TYPE_FIELDS (type); member; member = DECL_CHAIN
(member))
+fill_variable_array_bounds (TREE_TYPE (member));
+  return;
+}


Why is this happening in late dwarf?  I'm concerned that front-end
information that is necessary to do this might be lost by that point.


I thought only after the optimizations had run their course would we be
guaranteed to have accurate bound information.  At least, that's what my
experience showed.


Hmm, I'm don't know why optimizations would change the representation of 
the array type.



+  /* Variable-length types may be incomplete even if
+ TREE_ASM_WRITTEN.  For such types, fall through to
+ gen_array_type_die() and possibly fill in
+ DW_AT_{upper,lower}_bound attributes.  */
+  if ((TREE_CODE (type) != ARRAY_TYPE
+TREE_CODE (type) != RECORD_TYPE
+TREE_CODE (type) != UNION_TYPE
+TREE_CODE (type) != QUAL_UNION_TYPE)
+  || (TYPE_SIZE (type)
+   TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST))


Similarly, why check for INTEGER_CST here?


The INTEGER_CST check was supposed to mean we have bound information
already, no need to look further.

I guess we could have a variable length bound that does not decay to a
constant.


Right.  I would expect that to usually be the case with VLAs.


Perhaps I could check the presence of a cached DIE with a
type DIE containing a DW_TAG_subrange_type *and*
DW_AT_{lower,upper}_bound ??.  Basically I just want to add bound
information, if available and not already present.

Suggestions?


I'm still not sure why we can't just emit bound info in early dwarf. 
Can you be more specific about the optimization thing?


Jason

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu

On Thu, May 28, 2015 at 1:54 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 12:05 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 I have attached a patch that adds the new attribute noplt.  Please 
 review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


 Does it work on non-PIC calls?

 You are right, it doesnt work.  I have attached the patch with the
 changes you mentioned.


Since direct_p is true, do wee need

+  if (GET_CODE (call_op) != SYMBOL_REF
+  || SYMBOL_REF_LOCAL_P (call_op))
+return false;

H.J.

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam

On Thu, May 28, 2015 at 2:01 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 1:54 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 12:05 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 I have attached a patch that adds the new attribute noplt.  Please 
 review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


 Does it work on non-PIC calls?

 You are right, it doesnt work.  I have attached the patch with the
 changes you mentioned.


 Since direct_p is true, do wee need

 +  if (GET_CODE (call_op) != SYMBOL_REF
 +  || SYMBOL_REF_LOCAL_P (call_op))
 +return false;

We do need it right because  for this case below, I do not want an
indirect call:

__attribute__((noplt))
int foo() {
  return 0;
}

int main()
{
  return foo();
}

Assuming foo is not inlined, if I remove the lines you mentioned, I
will get an indirect call which is unnecessary.

Thanks
Sri


 H.J.

Re: [PR65768] Check rtx_cost when propagating constant

2015-05-28 Thread Jeff Law


On 05/13/2015 11:46 PM, Kugan wrote:

ping?

Thanks,
Kugan

On 15/04/15 17:53, Kugan wrote:

As mentioned in PR65768, ARM gcc generates suboptimal code for constant
Uses in loop. Part of the reason is cprop is undoing what loop invariant
code motion did.

Zhenqiang posted a patch at to fix this based on rtx costs:
https://gcc.gnu.org/ml/gcc-patches/2014-06/msg01321.html

I cleaned it up and bootstrapped, regression tested on x86_64-linux-gnu;
no new regressions. Is this OK for trunk?

Thanks,
Kugan

gcc/ChangeLog:

2015-04-15  Kugan Vivekanandarajah  kug...@linaro.org
Zhenqiang Chen  zhenqiang.c...@linaro.org

PR target/65768
* cprop.c (try_replace_reg): Check cost of constants before propagating.

I should have also noted, fresh bootstrap  regression test is needed too.

jeff

Re: [PR65768] Check rtx_cost when propagating constant

2015-05-28 Thread Jeff Law

I've CC'd Ilya as he's been looking at related issues in the x86 
backend, but from the other direction and I think he ought to be aware 
of the interactions of this potential change and his work.  In 
particular depending on the costing in the x86 backend we may see fewer 
propagations of GOTOFF constants to their use sites.




On 05/13/2015 11:46 PM, Kugan wrote:

ping?

Thanks,
Kugan

On 15/04/15 17:53, Kugan wrote:

As mentioned in PR65768, ARM gcc generates suboptimal code for constant
Uses in loop. Part of the reason is cprop is undoing what loop invariant
code motion did.

Zhenqiang posted a patch at to fix this based on rtx costs:
https://gcc.gnu.org/ml/gcc-patches/2014-06/msg01321.html

I cleaned it up and bootstrapped, regression tested on x86_64-linux-gnu;
no new regressions. Is this OK for trunk?

Thanks,
Kugan

gcc/ChangeLog:

2015-04-15  Kugan Vivekanandarajah  kug...@linaro.org
Zhenqiang Chen  zhenqiang.c...@linaro.org

PR target/65768
* cprop.c (try_replace_reg): Check cost of constants before propagating.
So, I've reviewed the discussion from last year.  To summarize my 
understanding (please correct me if I'm wrong):


For various reasons we can have out-of-range constants for arithmetic, 
logical or other operations.  Those out-of-range constants will 
typically be loaded into a register so that we can create valid insns.


LICM (and code motion in general) may hoist the constant register loads 
out of loops, which we generally consider a win (there's certainly cases 
where it is not though).  It's particularly helpful when the constant 
can be used by many instructions.


Global constant propagation may then try to replace uses of the constant 
by the constant itself.  Some of those propagations create valid insns, 
but insns with a higher cost than their prior form.  This is effectively 
undoing LICM.


The patch changes the constant propagator to check the rtx cost of the 
original form vs the propagated form and only propagates if the cost is 
the same or lower -- the obvious idea being to propagate the constant 
only when it saves us cycles.


Please correct me if I've got the overall summary incorrect.

There were several small issues raised that are probably worth a bit of 
further discussion.


Register pressure.  This patch can increase register pressure.  It 
happens if, prior to this patch the constant was propagated to all the 
use sites.  In that case the pseudo holding the constant is dead and 
gets eliminated.  With this patch we may decline to propagate the 
constant to the use site (due to cost) and as a result the pseudo 
remains live, thus increasing register pressure.


Based on Kugan's data, I don't see that as a major problem in practice. 
 Though Ilya might have specific cases for i686 PIC where it's a bigger 
concern.


Performance.  There wasn't a big win with this patch on either tested 
architecture -- which is no great surprise.  We're talking about very 
small cost differences, possibly differences that can be well hidden by 
modern pipelines.


General conerns about using rtx costing.  What Kugan is doing here is 
very similar to what's being done in other rtl passes WRT checking costs 
before making transformations.  So I don't see that as a significant 
reason to object to the patch.



WRT the patch itself.

The const_p variable is poorly named, though I can kindof see how you 
settled on it.  Maybe check_rtx_costs or something along those lines 
would be better.


The comment for the second hunk would probably be better as:

/* If TO is a constant, check the cost of the set after propagation
   to the cost of the set before the propagation.  If the cost is
   higher, then do not replace FROM with TO.  */


You should try to produce a testcase where this change shows a code 
generation improvement.Given we're checking target costs, that test 
will naturally be target specific.  But please do try.


So with the two nits fixed and a testcase, I think this can go forward.

jeff

Re: [PATCH, PR target/65103, 2/3] Propagate address constants into loops for i386

2015-05-28 Thread Jeff Law


On 05/05/2015 05:05 AM, Ilya Enkovich wrote:

2015-04-21 8:52 GMT+03:00 Jeff Law l...@redhat.com:

On 04/17/2015 02:34 AM, Ilya Enkovich wrote:


On 15 Apr 14:07, Ilya Enkovich wrote:


2015-04-14 8:22 GMT+03:00 Jeff Law l...@redhat.com:


On 03/15/2015 02:30 PM, Richard Sandiford wrote:



Ilya Enkovich enkovich@gmail.com writes:



This patch allows propagation of loop invariants for i386 if
propagated
value is a constant to be used in address operand.  Bootstrapped and
tested on x86_64-unknown-linux-gnu.  OK for trunk or stage 1?




Is it necessary for this to be a target hook?  The concept doesn't seem
particularly target-specific.  We should only propagate into the
address
if the new cost is no greater than the old cost, but if the address
meets that condition and if propagating at this point in the pipeline
is
a win on x86, then wouldn't it be a win for other targets too?



I agree with Richard here.  I can't see a strong reason why this should
be a
target hook.

Perhaps part of the issue here is the address costing metrics may not
have
enough context to make good decisions.  In which case what context do
they
need?



At this point I don't insist on a target hook.  The main reasoning was
to not affect other targets. If we extend propagation for non constant
values different aspects may appear. E.g. possible register pressure
changes may significantly affect ia32. I just wanted to have an
instrument to play with a propagation on x86 not affecting other
targets. I don't have an opportunity to test possible performance
implications on non-x86 targets. Don't expect (significant)
regressions there but who knows...

I'll remove the hook from this patch. Will probably introduce it later
if some target specific cases are found.

Thanks,
Ilya



Jeff



Here is a version with no hook.  Bootstrapped and tested on
x86_64-unknown-linux-gnu.  Is it OK for trunk?

Thanks,
Ilya
--
gcc/

2015-04-17  Ilya Enkovich  ilya.enkov...@intel.com

 PR target/65103
 * fwprop.c (forward_propagate_into): Propagate loop
 invariants if a target says so.

gcc/testsuite/

2015-04-17  Ilya Enkovich  ilya.enkov...@intel.com

 PR target/65103
 * gcc.target/i386/pr65103-2.c: New.


It seems to me there's a key piece missing here -- metrics.

When is this profitable, when is it not profitable.   Just blindly undoing
LICM seems wrong here.

The first thought is to look at register pressure through the loop.  I
thought we had some infrastructure for this kind of query available. It'd
probably be wise to re-use it.  In fact, one might reasonably ask if LICM
should have hoisted the expression to start with.


I'd also think the cost of the constant may come into play here.  A really
cheap constant probably should not have been hoisted by LICM to start with
-- but the code may have been written in such a way that some low cost
constants are pulled out as loop invariants at the source level.  So this
isn't strictly an issue of un-doing bad LICM

So I think to go forward we need to be working on solving the when is this
a profitable transformation to make.


This patch doesn't force propagation.  The patch just allows
propagation and regular fwprop cost estimation is used to compute if
this is profitable.  For i386 I don't see cases when we shouldn't
propagate. We remove instruction, reduce register pressure and having
constant in memory operand is free which is reflected in address_cost
hook.
Right, but you're blindly propagating.  The right thing to do is look at 
some kind of metric to estimate when it's profitable to propagate the 
constant back in vs leave it hoisted out.


If you look at what Kugan is doing in cprop.c, that's exactly the 
approach he's taking -- looking at rtx costing to determine when to 
propagate the constant back into the loop.  It could probably be made 
better with some knowledge of register pressure and looking at whether 
or not all uses (vs just some uses) of the constant will be propagated.


Jeff

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Jason Merrill


On 05/28/2015 04:42 PM, Jan Hubicka wrote:

As for optimization changing type representation, I suppose one case is when
function with varray type gets inlined and the array bound happens to be a
different expression afterwards.  We produce a new copy of the original type
with different bounds then.


That makes sense, but that would be a new type rather than modifications 
to the old type.


Jason

arm memcpy of aligned data

2015-05-28 Thread Mike Stump

So, the arm memcpy code of aligned data isn’t as good as it can be.

void *memcpy(void *dest, const void *src, unsigned int n);

void foo(char *dst, int i) {
  memcpy (dst, i, sizeof (i));
}

generates horrible code, but, it we are willing to notice the src or the 
destination are aligned, we can do much better:

$ ./cc1 -fschedule-fusion -fdump-tree-all-all -da -march=armv7ve 
-mcpu=cortex-m4 -fomit-frame-pointer -quiet -O2 /tmp/t.c -o t.s
$ cat t.s
[ … ]
foo:
@ args = 0, pretend = 0, frame = 4
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
sub sp, sp, #4
str r1, [r0]@ unaligned
add sp, sp, #4

Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c(revision 223842)
+++ gcc/config/arm/arm.c(working copy)
@@ -14376,7 +14376,10 @@ arm_block_move_unaligned_straight (rtx d
srcoffset + j * UNITS_PER_WORD - src_autoinc);
  mem = adjust_automodify_address (srcbase, SImode, addr,
   srcoffset + j * UNITS_PER_WORD);
- emit_insn (gen_unaligned_loadsi (regs[j], mem));
+ if (src_aligned)
+   emit_move_insn (regs[j], mem);
+ else
+   emit_insn (gen_unaligned_loadsi (regs[j], mem));
}
   srcoffset += words * UNITS_PER_WORD;
 }
@@ -14395,7 +14398,10 @@ arm_block_move_unaligned_straight (rtx d
dstoffset + j * UNITS_PER_WORD - dst_autoinc);
  mem = adjust_automodify_address (dstbase, SImode, addr,
   dstoffset + j * UNITS_PER_WORD);
- emit_insn (gen_unaligned_storesi (mem, regs[j]));
+ if (dst_aligned)
+   emit_move_insn (mem, regs[j]);
+ else
+   emit_insn (gen_unaligned_storesi (mem, regs[j]));
}
   dstoffset += words * UNITS_PER_WORD;
 }


Ok?

Can someone spin this through an arm test suite run for me, I was doing this by 
inspection and cross compile on a system with no arm bits.  Bonus points if you 
can check it in with the test case above marked up as appropriate.

Index: gcc/config/arm/arm.c
===
--- gcc/config/arm/arm.c(revision 223842)
+++ gcc/config/arm/arm.c(working copy)
@@ -14376,7 +14376,10 @@ arm_block_move_unaligned_straight (rtx d
srcoffset + j * UNITS_PER_WORD - src_autoinc);
  mem = adjust_automodify_address (srcbase, SImode, addr,
   srcoffset + j * UNITS_PER_WORD);
- emit_insn (gen_unaligned_loadsi (regs[j], mem));
+ if (src_aligned)
+   emit_move_insn (regs[j], mem);
+ else
+   emit_insn (gen_unaligned_loadsi (regs[j], mem));
}
   srcoffset += words * UNITS_PER_WORD;
 }
@@ -14395,7 +14398,10 @@ arm_block_move_unaligned_straight (rtx d
dstoffset + j * UNITS_PER_WORD - dst_autoinc);
  mem = adjust_automodify_address (dstbase, SImode, addr,
   dstoffset + j * UNITS_PER_WORD);
- emit_insn (gen_unaligned_storesi (mem, regs[j]));
+ if (dst_aligned)
+   emit_move_insn (mem, regs[j]);
+ else
+   emit_insn (gen_unaligned_storesi (mem, regs[j]));
}
   dstoffset += words * UNITS_PER_WORD;
 }

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu

On Thu, May 28, 2015 at 2:27 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 2:01 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 1:54 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 12:05 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 I have attached a patch that adds the new attribute noplt.  Please 
 review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


 Does it work on non-PIC calls?

 You are right, it doesnt work.  I have attached the patch with the
 changes you mentioned.


 Since direct_p is true, do wee need

 +  if (GET_CODE (call_op) != SYMBOL_REF
 +  || SYMBOL_REF_LOCAL_P (call_op))
 +return false;

 We do need it right because  for this case below, I do not want an
 indirect call:

 __attribute__((noplt))
 int foo() {
   return 0;
 }

 int main()
 {
   return foo();
 }

 Assuming foo is not inlined, if I remove the lines you mentioned, I
 will get an indirect call which is unnecessary.


I meant the GET_CODE (call_op) != SYMBOL_REF part isn't
needed.



-- 
H.J.

Re: [PATCH, FT32] initial support

2015-05-28 Thread Eric Botcazou

 Thanks very much. ChangeLog entry:
 
 2015-05-14  James Bowman  james.bow...@ftdichip.com
 
 * configure.ac: FT32 target added
 * libgcc/config.host: FT32 target added
 * gcc/config/ft32/: FT32 target added
 * libgcc/config/ft32/: FT32 target added
 * gcc/doc/install.texi, invoke.texi, md.texi: FT32 details added
 * gcc/doc/contrib.texi: self added
 * contrib/config-list.mk: FT32 target added
 * configure: Regenerated

That's wrong, you cannot add just a single entry to the toplevel ChangeLog, 
you need to add entries to the ChangeLog of every subdirectory instead: gcc, 
libgcc, contrib, etc.  Please fix.

-- 
Eric Botcazou

Re: Do less generous pointer globbing in alias.c

2015-05-28 Thread Jan Hubicka

hello,
only providing you the testcase why I need transitive closure of contains
pointer via the extra child I noticed that there is extra symmetry to handle:

 struct a {void *ptr;}
 char **ptr = (char **)a.ptr;
 ptr = ...

This one doesn't really fly with my extra subset code, because ptr is not
universal pointer, but struct a contains one and thus should conflict with
every pointer.  Adding every pointer as subset of every structure with
universal pointer is impractical (childs of those structures would be appearing
as new pointer types get alias sets) and thus indeed it is better to handle it
same way as alias set 0 - by a special case in alias_set_subset_of
and alias_sets_conflict_p.

So I added the second flag - has_pointer that is transitive closure of
is_pointer and added the special case to alias_sets_conflict_p instead of 
adding the extra subset relation into the DAG.

I also added statistics and made changes you suggested (making child
hash to be possibly NULL and clenaing up alias set conflict construction)

I also constructed a testcase that covers all the new code paths.

The patch bootstrapped/regtested ppc64-linux.  I am not bound to teaching
next week, so if I hear no negative comments, I will schedule commiting the
patch for weekend to deal with possible fallout.

There are few cleanups possible incrementally - i.e. the hash set seems
irrationaly large for average type, we could avoid some pointer travelling
overhead and we could also do better at alias_sets_must_conflict_p.

Honza

* alias.c (alias_set_entry_d): Add is_pointer and has_pointer.
(alias_stats): Add num_universal.
(alias_set_subset_of): Special case pointers; be ready for NULL
children.
(alias_sets_conflict_p): Special case pointers; be ready for NULL
children.
(init_alias_set_entry): Break out from ...
(record_alias_subset): ... here; propagate new fields;
allocate children only when really needed.
(get_alias_set): Do less generous pointer globbing.
(dump_alias_stats_in_alias_c): Update statistics.
* gcc.dg/alias-8.c: Do not xfail.
* gcc.dg/pr62167.c: Prevent FRE.
* gcc.dg/alias-14.c: New testcase.
Index: alias.c
===
--- alias.c (revision 223772)
+++ alias.c (working copy)
@@ -183,10 +184,6 @@ struct GTY(()) alias_set_entry_d {
   /* The alias set number, as stored in MEM_ALIAS_SET.  */
   alias_set_type alias_set;
 
-  /* Nonzero if would have a child of zero: this effectively makes this
- alias set the same as alias set zero.  */
-  int has_zero_child;
-
   /* The children of the alias set.  These are not just the immediate
  children, but, in fact, all descendants.  So, if we have:
 
@@ -195,6 +192,17 @@ struct GTY(()) alias_set_entry_d {
  continuing our example above, the children here will be all of
  `int', `double', `float', and `struct S'.  */
   hash_mapint, int, alias_set_traits *children;
+
+  /* Nonzero if would have a child of zero: this effectively makes this
+ alias set the same as alias set zero.  */
+  bool has_zero_child;
+  /* Nonzero if alias set corresponds to pointer type itself (i.e. not to
+ aggregate contaiing pointer.
+ This is used for a special case where we need an universal pointer type
+ compatible with all other pointer types.  */
+  bool is_pointer;
+  /* Nonzero if is_pointer or if one of childs have has_pointer set.  */
+  bool has_pointer;
 };
 typedef struct alias_set_entry_d *alias_set_entry;
 
@@ -222,6 +230,7 @@ static struct {
   unsigned long long num_same_objects;
   unsigned long long num_volatile;
   unsigned long long num_dag;
+  unsigned long long num_universal;
   unsigned long long num_disambiguated;
 } alias_stats;
 
@@ -454,18 +463,58 @@ mems_in_disjoint_alias_sets_p (const_rtx
 bool
 alias_set_subset_of (alias_set_type set1, alias_set_type set2)
 {
-  alias_set_entry ase;
+  alias_set_entry ase2;
 
   /* Everything is a subset of the aliases everything set.  */
   if (set2 == 0)
 return true;
 
-  /* Otherwise, check if set1 is a subset of set2.  */
-  ase = get_alias_set_entry (set2);
-  if (ase != 0
-   (ase-has_zero_child
- || ase-children-get (set1)))
+  /* Check if set1 is a subset of set2.  */
+  ase2 = get_alias_set_entry (set2);
+  if (ase2 != 0
+   (ase2-has_zero_child
+ || (ase2-children  ase2-children-get (set1
 return true;
+
+  /* As a special case we consider alias set of void * to be both subset
+ and superset of every alias set of a pointer.  This extra symmetry does
+ not matter for alias_sets_conflict_p but it makes 
aliasing_component_refs_p
+ to return true on the following testcase:
+
+ void *ptr;
+ char **ptr2=(char **)ptr;
+ *ptr2 = ...
+
+ Additionally if a set contains universal pointer, we consider every 
pointer
+ to be a subset of it, but we do not

Re: [patch] fix bootstrap on FreeBSD i386/arm

2015-05-28 Thread Jason Merrill


OK, thanks.

Jason

[patch] libstdc++/66327 don't pass null pointers to memcmp

2015-05-28 Thread Jonathan Wakely


std::equal((int*)0, (int*)0, p) and std::equal(p, p, (int*)0) are
valid for any input iterator p, and must not pass a null pointer to
memcpy.

Similarly, std::lexicographical_compare((int*)0, (int*)0, p, q) and
std::lexicographical_compare(p, q, (int*)0, (int*)0) are valid for any
input iterators p and q, and must not pass a null pointer to memcpy.

This is a rather brute force fix, but if noone has any better ideas
I'll commit this to the trunk and 4.9 and 5 branches tomorrow. (I
think it should go on the branches, because 4.9 is known to optimise
away null pointer checks after invalid calls to memcmp like this).

I am not adding tests for these as we have no way to use ubsan in the
testsuite at present. I plan to make that possible, and will go back
and add tests using -fsanitize=undefined for all the recent fixes I've
made for ubsan errors.

Tested powerpc64le-linux.
commit 323133a6f623e827c7e70fa6918c7149e8932443
Author: Jonathan Wakely jwak...@redhat.com
Date:   Thu May 28 20:23:22 2015 +0100

	PR libstdc++/66327
	* include/bits/stl_algobase.h (__equaltrue::equal): Do not call
	memcmp with null pointers.
	(__lexicographical_comparetrue::__lc): Do not call memcmp for empty
	ranges.

diff --git a/libstdc++-v3/include/bits/stl_algobase.h b/libstdc++-v3/include/bits/stl_algobase.h
index 409ef36..db065e2 100644
--- a/libstdc++-v3/include/bits/stl_algobase.h
+++ b/libstdc++-v3/include/bits/stl_algobase.h
@@ -812,6 +812,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 static bool
 equal(const _Tp* __first1, const _Tp* __last1, const _Tp* __first2)
 {
+	  if (__first1 == 0 || __first2 == 0)
+	return __first1 == __last1;
+
 	  return !__builtin_memcmp(__first1, __first2, sizeof(_Tp)
    * (__last1 - __first1));
 	}
@@ -917,9 +920,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  const size_t __len1 = __last1 - __first1;
 	  const size_t __len2 = __last2 - __first2;
-	  const int __result = __builtin_memcmp(__first1, __first2,
-		std::min(__len1, __len2));
-	  return __result != 0 ? __result  0 : __len1  __len2;
+	  if (__len1  __len2)
+	{
+	  if (int __result = __builtin_memcmp(__first1, __first2,
+		  std::min(__len1, __len2)))
+		{
+		  return __result  0;
+		}
+	}
+	  return __len1  __len2;
 	}
 };

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Trevor Saunders

On Thu, May 28, 2015 at 08:47:16PM +0200, Martin Liška wrote:
 On 05/28/2015 08:03 PM, Jakub Jelinek wrote:
 On Thu, May 28, 2015 at 07:57:39PM +0200, Richard Biener wrote:
 But we've been trying to avoid this. And the jit might not be too happy 
 about it either.
 
 Yeah, we should certainly try to avoid them, especially if it would affect
 many variables having to be constructed.
 
  Jakub
 
 
 Ok, thus I will do it as before my modifications:
 
 static pool_allocator update_cost_record *update_cost_record_pool = NULL;
 
 /* Initiate update cost records.  */
 static void
 init_update_cost_records (void)
 {
  update_cost_record_pool = new pool_allocator update_cost_record
(update cost records, 100);
 }
 
 I'm going to migrate rest of patches that use the same construct.


Hrm, why not just change pool_allocator so it does the first allocation
on the first alloc and just initializes everything to null / 0?  Then
the ctor would be close to trivial.  Then if you really care about the
stuff gcc doesn't optimize away you could add a special class
static_pool_allocator (you might also need to hack in a way to get the
c++ fe to do constexpr / defaulted functions).

Trev

 
 Thanks,
 Martin

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam

On Thu, May 28, 2015 at 12:05 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 I have attached a patch that adds the new attribute noplt.  Please 
 review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


 Does it work on non-PIC calls?

You are right, it doesnt work.  I have attached the patch with the
changes you mentioned.

Thanks
Sri


 --
 H.J.
* config/i386/i386.c (avoid_plt_to_call): New function.
(ix86_output_call_insn): Generate indirect call for functions
marked with noplt attribute.
(attribute_spec ix86_attribute_): Define new attribute noplt.
* doc/extend.texi: Document new attribute noplt.
* gcc.target/i386/noplt-1.c: New testcase.
* gcc.target/i386/noplt-2.c: New testcase.

Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 223720)
+++ config/i386/i386.c  (working copy)
@@ -25599,6 +25599,25 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx call
   return call;
 }
 
+/* Return true if the function being called was marked with attribute
+   noplt.  If this function is defined, this should return false.  */
+static bool
+avoid_plt_to_call (rtx call_op)
+{
+  if (GET_CODE (call_op) != SYMBOL_REF
+  || SYMBOL_REF_LOCAL_P (call_op))
+return false;
+
+  tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+  if (symbol_decl != NULL_TREE
+   TREE_CODE (symbol_decl) == FUNCTION_DECL
+   lookup_attribute (noplt, DECL_ATTRIBUTES (symbol_decl)))
+return true;
+
+  return false;
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
@@ -25611,7 +25630,13 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = %!jmp\t%P0;
+   {
+ if (!TARGET_MACHO  !TARGET_SEH  !TARGET_PECOFF
+  TARGET_64BIT  avoid_plt_to_call (call_op))
+   xasm = %!jmp\t*%p0@GOTPCREL(%%rip);
+ else
+   xasm = %!jmp\t%P0;
+   }
   /* SEH epilogue detection requires the indirect branch case
 to include REX.W.  */
   else if (TARGET_SEH)
@@ -25654,7 +25679,13 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
 }
 
   if (direct_p)
-xasm = %!call\t%P0;
+{
+  if (!TARGET_MACHO  !TARGET_SEH  !TARGET_PECOFF
+  TARGET_64BIT  avoid_plt_to_call (call_op))
+xasm = %!call\t*%p0@GOTPCREL(%%rip);
+  else
+xasm = %!call\t%P0;
+}
   else
 xasm = %!call\t%A0;
 
@@ -46628,6 +46659,9 @@ static const struct attribute_spec ix86_attribute_
 false },
   { callee_pop_aggregate_return, 1, 1, false, true, true,
 ix86_handle_callee_pop_aggregate_return, true },
+  /* Attribute to avoid calling function via PLT.  */
+  { noplt, 0, 0, true, false, false, ix86_handle_fndecl_attribute,
+false },
   /* End element.  */
   { NULL,0, 0, false, false, false, NULL, false }
 };
Index: doc/extend.texi
===
--- doc/extend.texi (revision 223720)
+++ doc/extend.texi (working copy)
@@ -4858,6 +4858,13 @@ On x86-32 targets, the @code{stdcall} attribute ca
 assume that the called function pops off the stack space used to
 pass arguments, unless it takes a variable number of arguments.
 
+@item noplt
+@cindex @code{noplt}

Re: fix pr65369.c testcase

2015-05-28 Thread DJ Delorie


Done.  Thanks!

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Jan Hubicka

 On 05/28/2015 02:53 PM, Aldy Hernandez wrote:
 On 05/27/2015 08:39 AM, Jason Merrill wrote:
 On 05/20/2015 11:50 AM, Aldy Hernandez wrote:
 
 +  /* Fill in the size of variable-length fields in late dwarf.  */
 +  if (TREE_ASM_WRITTEN (type)
 +   !early_dwarf_dumping)
 +{
 +  tree member;
 +  for (member = TYPE_FIELDS (type); member; member = DECL_CHAIN
 (member))
 +fill_variable_array_bounds (TREE_TYPE (member));
 +  return;
 +}
 
 Why is this happening in late dwarf?  I'm concerned that front-end
 information that is necessary to do this might be lost by that point.
 
 I thought only after the optimizations had run their course would we be
 guaranteed to have accurate bound information.  At least, that's what my
 experience showed.
 
 Hmm, I'm don't know why optimizations would change the
 representation of the array type.

I don't think we change representation ATM, but eventually we want to get into
the datastructure reordering busyness.  I suppose to get this debug output
friendly, we will need a way to update the existing dwarf DIE to whatever
changes we want.

As for optimization changing type representation, I suppose one case is when
function with varray type gets inlined and the array bound happens to be a
different expression afterwards.  We produce a new copy of the original type
with different bounds then.

Honza

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Aldy Hernandez


On 05/27/2015 08:39 AM, Jason Merrill wrote:

On 05/20/2015 11:50 AM, Aldy Hernandez wrote:

+ determine anscestry later.  */


ancestry


Fixed.




+static bool early_dwarf_dumping;


Sorry for the late bikeshedding, but dumping suddently strikes me as
odd, since there is no output as with other dumping in the compiler. Can
we change that to generation or building?


Fixed.




+  /* Reuse DIE even with a differing context.
+
+ This happens when called through
+ dwarf2out_abstract_function for formal parameter
+ packs.  */
+  gcc_assert (parm_die-die_parent-die_tag
+  == DW_TAG_GNU_formal_parameter_pack);


Does this mean we're generating a new DW_TAG_GNU_formal_parameter_pack
in late debug even though we already generated one in early debug?  If
so, why?


I will be addressing this separately, as we're likely to iterate many 
times on this :).





-  /* It is possible to have both DECL_ABSTRACT_P and DECLARATION be
true if we
- started to generate the abstract instance of an inline, decided
to output
- its containing class, and proceeded to emit the declaration of
the inline
- from the member list for the class.  If so, DECLARATION takes
priority;
- we'll get back to the abstract instance when done with the
class.  */
-
-  /* The class-scope declaration DIE must be the primary DIE.  */
-  if (origin  declaration  class_or_namespace_scope_p (context_die))
-{
-  origin = NULL;
-  gcc_assert (!old_die);
-}


Can't this happen anymore?


As discussed on IRC, this is dead code in the presence of debug-early, 
and no longer applicable with the upcoming DECL_ABSTRACT* redesign.





+  if ((is_cu_die (old_die-die_parent)
+   /* FIXME: Jason doesn't like this condition, but it fixes
+  the inconsistency/ICE with the following Fortran test:
+
+ module some_m
+ contains
+logical function funky (FLAG)
+  funky = .true.
+   end function
+ end module
+
+  Another alternative is !is_cu_die (context_die).
+   */
+   || old_die-die_parent-die_tag == DW_TAG_module


I like it now.  :)
You can leave the rest of the comment.


Updated comment.




+  /* For non DECL_EXTERNALs, if range information is available, fill
+ the DIE with it.  */
   else if (!DECL_EXTERNAL (decl))
 {
   HOST_WIDE_INT cfa_fb_offset;
+
   struct function *fun = DECL_STRUCT_FUNCTION (decl);

-  if (!old_die || !get_AT (old_die, DW_AT_inline))
-equate_decl_number_to_die (decl, subr_die);
+  /* If we have no fun-fde, we have no range information.
+ Skip over and fill in range information in the second
+ dwarf pass.  */
+  if (!fun-fde)
+goto no_fde_continue;


How about controlling this block with !early_dwarf so you don't need to
deal with missing FDE?


Fixed.




   if (generic_decl_parm
lang_hooks.function_parameter_pack_p (generic_decl_parm))
-gen_formal_parameter_pack_die (generic_decl_parm,
-   parm, subr_die,
-   parm);
+{
+  if (early_dwarf_dumping)
+gen_formal_parameter_pack_die (generic_decl_parm,
+   parm, subr_die,
+   parm);
+  else if (parm)
+parm = DECL_CHAIN (parm);
+}


Let's try only setting generic_decl when early_dwarf.


Fixed.




+  /* Unless we have an existing non-declaration DIE, equate the new
+ DIE.  */
+  if (!old_die || is_declaration_die (old_die))
+equate_decl_number_to_die (decl, subr_die);

...

+  if (decl  (DECL_ABSTRACT_P (decl) || declaration || old_die == NULL
+   /* If we make it to a specialization, we have already
+  handled the declaration by virtue of early dwarf.
+  If so, make a new assocation if available, so late
+  dwarf can find it.  */
+   || (specialization_p  early_dwarf_dumping)))
 equate_decl_number_to_die (decl, var_die);


Why are the conditions so different?  Can we use the function condition
for variables, too?


Hmmm...because they were already that way when I arrived? :).

How about what I do in the attached patch, which also seems to do the 
trick?:


  if (decl  (DECL_ABSTRACT_P (decl)
   || !old_die || is_declaration_die (old_die)))


+  /* Do nothing.  This must have been early dumped and it
+ won't even need location information since it's a
+ DW_AT_inline function.  */
+  for (dw_die_ref c = context_die; c; c = c-die_parent)
+if (c-die_tag == DW_TAG_inlined_subroutine
+|| c-die_tag == DW_TAG_subprogram)
+  {
+gcc_assert (get_AT (c, DW_AT_inline));
+break;
+  }


Maybe wrap this in #ifdef ENABLE_CHECKING.


Done.




+  /* Do the new DIE dance.  */
+  stmt_die = new_die (DW_TAG_lexical_block, context_die, stmt);
+  BLOCK_DIE (stmt) = stmt_die;
+}
+}
+  else

Re: [patch] fix bootstrap on FreeBSD i386/arm

2015-05-28 Thread Eric Botcazou

 This patch restores bootstrap on i386-*-freebsd*.
 The build was failing after the introduction of -std=c++98
 configure/build flag. The -std=c++98 enables strict_ansi and on FreeBSD
 the libc function atoll is not defined for this.

Solaris (x86 and SPARC) is also broken in various ways: for example, the sun 
preprocessor macro is no more defined.  Why do we need strict ANSI exactly?

-- 
Eric Botcazou

[PATCH] Extend -fno-plt to normal non-PIC branches on x86

2015-05-28 Thread H.J. Lu

This patch extends -fno-plt to normal non-PIC calls on x86.  -fno-plt
works in 64-bit mode with the existing binutils.  For 32-bit, we need
the updated assembler and linker to support call/jmp *foo@GOT with
a new relocation different from R_386_GOT32 to indicate that this
relocation applies to indirect branches.  A configure time check is
added to verify that 32-bit assembler generates a known relocation
which is different from R_386_GOT32.  A new 32-bit relocaton is needed
since call/jmp *foo@GOT requires a different relocation from R_386_GOT32
which is used together with a GOT register in call/jmp *foo@GOT(%reg).

OK for master?

Thanks.

H.J.
---
* configure.ac (HAVE_AS_INDIRECT_BRANCH_VIA_GOT): New.  Defined
if 32-bit assembler generates a known relocation which is
different from R_386_GOT32.
* config.in: Regenerated.
* configure: Likewise.
* config/i386/i386.c (ix86_output_call_insn):  Extend -fno-plt
to normal non-PIC branches.
---
 gcc/config.in  | 14 ++
 gcc/config/i386/i386.c | 42 --
 gcc/configure  | 47 ++-
 gcc/configure.ac   | 18 +-
 4 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index daaf906..0ee5c38 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -363,6 +363,12 @@
 #endif
 
 
+/* Define true if the assembler supports 'call *foo@GOT'. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+#endif
+
+
 /* Define if your assembler supports the Sun syntax for cmov. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_IX86_CMOV_SUN_SYNTAX
@@ -686,8 +692,8 @@
 #endif
 
 
-/* Define to 1 if we found a declaration for 'basename', otherwise define to
-   0. */
+/* Define to 1 if you have the declaration of `basename(const char*)', and to
+   0 if you don't. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_DECL_BASENAME
 #endif
@@ -963,8 +969,8 @@
 #endif
 
 
-/* Define to 1 if we found a declaration for 'strstr', otherwise define to 0.
-   */
+/* Define to 1 if you have the declaration of `strstr(const char*,const
+   char*)', and to 0 if you don't. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_DECL_STRSTR
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e77cd04..5ca19f2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25611,7 +25611,26 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = %!jmp\t%P0;
+   {
+ if (!flag_plt
+  !flag_pic
+  !TARGET_MACHO
+  !TARGET_SEH
+  !TARGET_PECOFF)
+   {
+ /* Avoid PLT.  */
+ if (TARGET_64BIT)
+   xasm = %!jmp\t*%p0@GOTPCREL(%%rip);
+ else
+#ifdef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+   xasm = %!jmp\t*%p0@GOT;
+#else
+   xasm = %!jmp\t%P0;
+#endif
+   }
+ else
+   xasm = %!jmp\t%P0;
+   }
   /* SEH epilogue detection requires the indirect branch case
 to include REX.W.  */
   else if (TARGET_SEH)
@@ -25654,7 +25673,26 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 }
 
   if (direct_p)
-xasm = %!call\t%P0;
+{
+  if (!flag_plt
+  !flag_pic
+  !TARGET_MACHO
+  !TARGET_SEH
+  !TARGET_PECOFF)
+   {
+ /* Avoid PLT.  */
+ if (TARGET_64BIT)
+   xasm = %!call\t*%p0@GOTPCREL(%%rip);
+ else
+#ifdef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+   xasm = %!call\t*%p0@GOT;
+#else
+   xasm = %!call\t%P0;
+#endif
+   }
+  else
+   xasm = %!call\t%P0;
+}
   else
 xasm = %!call\t%A0;
 
diff --git a/gcc/configure b/gcc/configure
index a9a76d6..4419035 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25361,7 +25361,7 @@ $as_echo #define HAVE_AS_IX86_DIFF_SECT_DELTA 1 
confdefs.h
 
 fi
 
-# These two are used unconditionally by i386.[ch]; it is to be defined
+# These three are used unconditionally by i386.[ch]; it is to be defined
 # to 1 if the feature is present, 0 otherwise.
 as_ix86_gotoff_in_data_opt=
 if test x$gas = xyes; then
@@ -25407,6 +25407,51 @@ cat confdefs.h _ACEOF
 _ACEOF
 
 
+as_ix86_indirect_branch_via_got_opt=
+if test x$gas = xyes; then
+  as_ix86_indirect_branch_via_got_opt=--32
+fi
+{ $as_echo $as_me:${as_lineno-$LINENO}: checking assembler for call 
*foo@GOT 5
+$as_echo_n checking assembler for call *foo@GOT...  6; }
+if test ${gcc_cv_as_ix86_indirect_branch_via_got+set} = set; then :
+  $as_echo_n (cached)  6
+else
+  gcc_cv_as_ix86_indirect_branch_via_got=no
+if test $in_tree_gas = yes; then
+if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 26 \) \* 1000 + 0`
+  then gcc_cv_as_ix86_indirect_branch_via_got=yes
+fi
+  elif test x$gcc_cv_as != x; then
+$as_echo '

Re: [PATCH 3/4, libitm, sh]: Change gtm_futex_{wait,wake} to int

2015-05-28 Thread Uros Bizjak

On Thu, May 28, 2015 at 12:52 AM, Kaz Kojima kkoj...@rr.iij4u.or.jp wrote:

 * config/linux/sh/futex_bits.h (sys_futex0) Change operands
 op and val to int.

 Untested.

 OK for mainline?

 OK.

 Although it looks obvious, I've confirmed that there is no build
 issue/regression on sh4-unknown-linux-gnu with it and the patch 1/4.

Thanks! I went ahead and commit the whole patch series to mainline SVN.

Uros.

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Richard Biener

On May 28, 2015 7:06:36 PM GMT+02:00, Jeff Law l...@redhat.com wrote:
On 05/28/2015 04:42 AM, David Malcolm wrote:

 Am I right in thinking that this is a statically-allocated object
with a
 non-trivial constructor?  i.e. that this constructor has to run
before
 main is entered?

 Do our coding guidelines allow for this?  (I've been burned by this
 before, on a buggy C++ runtime that didn't manage to support these).
 I'm a little nervous about this, touching global state before
 main (e.g. from the point-of-view of the JIT), though I don't know
yet
 if this is just a gut reaction, or if there's a valid concern here
(I'm
 officially on holiday this week, so I haven't had a chance to dig
deeply
 into these patches yet, sorry).
That idiom is used in various places by Martin's patches.   I didn't
see 
a strong rhyme or reason behind why it was used over allocating 
something in automatic or heap storage.

As to supporting it, I'm not terribly concerned about other buggy C++ 
runtimes.  GCC bootstraps with GCC, which means we've got our C++ 
runtime.  The only worry becomes the low level bits that we build our 
static ctor/dtor support on top of -- and I haven't seen major problems

with that for eons.

But we've been trying to avoid this. And the jit might not be too happy about 
it either.

jeff

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Jakub Jelinek

On Thu, May 28, 2015 at 07:57:39PM +0200, Richard Biener wrote:
 But we've been trying to avoid this. And the jit might not be too happy about 
 it either.

Yeah, we should certainly try to avoid them, especially if it would affect
many variables having to be constructed.

Jakub

Re: [PATCH][ARM/AArch64 Testsuite] Cleanup advsimd-intrinsics.exp, removing unnecessary loop

2015-05-28 Thread Christophe Lyon

On 28 May 2015 at 18:45, Alan Lawrence alan.lawre...@arm.com wrote:
 I've tested this on aarch64, aarch64_be, and arm, and in all cases, the same
 tests are executed (whether running the whole advsimd-intrinsics.exp, or
 manually specifying a single file). AFAICT the loop, explicit
 runtest_file_p, and gcc_set_parallelization_enable, all stem from a point
 where we were calling c-torture-execute instead of or as well as
 gcc-dg-runtest. Now we have only the latter, it is quite capable of looping
 through tests itself (and correctly parallelizing them).

 Ok for trunk?

This looks OK, but why can't you also drop the other torture-related
lines as you did in your previous patch?
I mean:
load_lib c-torture.exp
load_lib torture-options.exp
etc...

Christophe.


 Cheers, Alan


 diff --git
 a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrins
 index 583832a..19a982d 100644
 ---
 a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
 +++
 b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
 @@ -55,20 +55,8 @@ set-torture-options $C_TORTURE_OPTIONS {{}}
 $LTO_TORTURE_OPTI
  set additional_flags [add_options_for_arm_neon ]

  # Main loop.
 -foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
 -# If we're only testing specific files and this isn't one of them, skip
 it.
 -if ![runtest_file_p $runtests $src] then {
 -   continue
 -}
 -
 -# runtest_file_p is already run above, and the code below can run
 -# runtest_file_p again, make sure everything for this test is
 -# performed if the above runtest_file_p decided this runtest
 -# instance should execute the test
 -gcc_parallel_test_enable 0
 -gcc-dg-runtest $src  $additional_flags
 -gcc_parallel_test_enable 1
 -}
 +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
 +   ${additional_flags}

  # All done.
  set dg-do-what-default ${save-dg-do-what-default}

Re: [PATCH, RFC] New memory usage statistics infrastructure

2015-05-28 Thread Jeff Law


On 05/28/2015 06:29 AM, Martin Liška wrote:





Hello.

Thank you for pointing about missing copyright.
Following patch adds that.

Ready for trunk?

Yes.
jeff

Re: [Patch, fortran, PR44672, v6] [F08] ALLOCATE with SOURCE and no array-spec

2015-05-28 Thread Mikael Morin

Le 28/05/2015 17:29, Andre Vehreschild a écrit :
 *** resolve_allocate_expr (gfc_expr *e, gfc_
 *** 7103,7112 
 --- 7103,7123 
 if (!ref2 || ref2-type != REF_ARRAY || ref2-u.ar.type == AR_FULL
 || (dimension  ref2-u.ar.dimen == 0))
   {
 +   /* F08:C633.  */
 +   if (code-expr3)
 + {
 +   if (!gfc_notify_std (GFC_STD_F2008, Array specification required 
 +in ALLOCATE statement at %L, e-where))
 + goto failure;
 +   *array_alloc_wo_spec = true;
 + }
 +   else
 + {
 gfc_error (Array specification required in ALLOCATE statement 
at %L, e-where);
 goto failure;
   }
 + }
   
 /* Make sure that the array section reference makes sense in the
context of an ALLOCATE specification.  */
I think we can be a little be more user friendly with the gfc_notify_std
error message.
Something like:
ALLOCATE without array spec at %L
ALLOCATE with array bounds determined from SOURCE or MOLD at %L

 *** gfc_array_init_size (tree descriptor, in
 *** 5044,5053 
lower == NULL= lbound = 1, ubound = upper[n]
upper[n] = NULL  = lbound = 1, ubound = lower[n]
upper[n] != NULL = lbound = lower[n], ubound = upper[n]  */
 -   ubound = upper[n];
   
 /* Set lower bound.  */
 gfc_init_se (se, NULL);
 if (lower == NULL)
   se.expr = gfc_index_one_node;
 else
 --- 5050,5063 
lower == NULL= lbound = 1, ubound = upper[n]
upper[n] = NULL  = lbound = 1, ubound = lower[n]
upper[n] != NULL = lbound = lower[n], ubound = upper[n]  */
   
 /* Set lower bound.  */
 gfc_init_se (se, NULL);
 +   if (expr3_desc != NULL_TREE)
 + se.expr = gfc_index_one_node;
 +   else
 + {
 +   ubound = upper[n];
 if (lower == NULL)
   se.expr = gfc_index_one_node;
 else
 *** gfc_array_init_size (tree descriptor, in
 *** 5064,5069 
 --- 5074,5080 
 ubound = lower[n];
   }
   }
 + }
 gfc_conv_descriptor_lbound_set (descriptor_block, descriptor,
 gfc_rank_cst[n], se.expr);
 conv_lbound = se.expr;
You can avoid reindenting if the ubound = upper[n] statement is kept at
its original place.

 *** gfc_array_init_size (tree descriptor, in
 *** 5076,5085 
   
 /* Set upper bound.  */
 gfc_init_se (se, NULL);
 gcc_assert (ubound);
 gfc_conv_expr_type (se, ubound, gfc_array_index_type);
 gfc_add_block_to_block (pblock, se.pre);
 ! 
 gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
 gfc_rank_cst[n], se.expr);
 conv_ubound = se.expr;
 --- 5087,5111 
   
 /* Set upper bound.  */
 gfc_init_se (se, NULL);
 +   if (expr3_desc != NULL_TREE)
 + {
 +   /* Set the upper bound to be (desc.ubound - desc.lbound)+ 1.  */
 +   tmp = fold_build2_loc (input_location, MINUS_EXPR,
 +  gfc_array_index_type,
 +  gfc_conv_descriptor_ubound_get (
 +expr3_desc, gfc_rank_cst[n]),
 +  gfc_conv_descriptor_lbound_get (
 +expr3_desc, gfc_rank_cst[n]));
 +   se.expr = fold_build2_loc (input_location, PLUS_EXPR,
 +  gfc_array_index_type, tmp,
 +  gfc_index_one_node);
 + }
 +   else
 + {
 gcc_assert (ubound);
 gfc_conv_expr_type (se, ubound, gfc_array_index_type);
 gfc_add_block_to_block (pblock, se.pre);
 ! }
 gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
 gfc_rank_cst[n], se.expr);
 conv_ubound = se.expr;
Your one-based-ness problem was here, wasn't it?
I would rather copy directly lbound and ubound from expr3_desc to
descriptor.
If the source has non-one-based bounds, the above would produce wrong
bounds.

 *** gfc_trans_allocate (gfc_code * code)
 *** 5174,5185 
   {
 if (!code-expr3-mold
 || code-expr3-ts.type == BT_CHARACTER
 !   || vtab_needed)
   {
 /* Convert expr3 to a tree.  */
 gfc_init_se (se, NULL);
 !   /* For all simple expression just get the descriptor or the
 !  reference, respectively, depending on the rank of the expr.  */
 if (code-expr3-rank != 0)
   gfc_conv_expr_descriptor (se, code-expr3);
 else
 --- 5175,5195 
   {
 if (!code-expr3-mold
 || code-expr3-ts.type == BT_CHARACTER
 !   || vtab_needed
 !   || code-ext.alloc.arr_spec_from_expr3)
   {
 /* Convert expr3 to a tree.  */

Re: [PATCH v3] libiberty: cleanup Makefile.in

2015-05-28 Thread Bernhard Reutner-Fischer

On 28 May 2015 at 17:48, Ian Lance Taylor i...@google.com wrote:
 On Thu, May 28, 2015 at 1:57 AM, Bernhard Reutner-Fischer
 rep.dot@gmail.com wrote:

 +stamp-pic-ofiles: $(CFILES:%=$(srcdir)/%)

 To the best of my knowledge, in POSIX make variable substitutions of
 this form do not recognize % specially.  In POSIX make this kind of
 substitution can only be used to change the file extension.  Also,
 POSIX make does not permit a variable expansion in the right hand side
 of the substitution.

Pity. bmake (a port from some BSD make) does support it.

Either way, i'm withdrawing these patches and the idea to attempt to make
the pic/ and noasan/ handling prettier.

thanks anyway!

cheers,

Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-05-28 Thread Jeff Law


On 05/28/2015 06:49 AM, Martin Liška wrote:
.


This mechanism has been just adapted. I find it quite useful as we have
examples in source code where we
allocate same struct/class types from a various pool. For debugging
purpose, it helps to identify if
release operation is called for a correct pool.
I saw that you were following existing practice for the pools in the 
removal patch. I still don't like it as it makes mixing and matching 
objects harder when debugging gcc and if the structure is exposed for 
plugins, then we've got an unnecessary ABI plugin breakage.


I certainly understand how it's useful -- I'm not questioning that.  I'm 
questioning changing the size of structures on ENABLE_CHECKING.


My first inclination would be to include all that stuff unconditionally. 
 If that's too much overhead, then perhaps include the structure 
member, but not bother with any of the bookkeeping except for 
ENABLE_CHECKING.




Anyway, I would like to commit all these patches at once (one by one).
Thus, I'm going to wait for approval for the whole series before I'll
commit the set.
Quite reasonable -- I was mostly trying to make sure I understood the 
testing situation.


I think at this point the whole series is approved, so you can move forward.

jeff

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Martin Liška


On 05/28/2015 08:03 PM, Jakub Jelinek wrote:

On Thu, May 28, 2015 at 07:57:39PM +0200, Richard Biener wrote:

But we've been trying to avoid this. And the jit might not be too happy about 
it either.


Yeah, we should certainly try to avoid them, especially if it would affect
many variables having to be constructed.

Jakub



Ok, thus I will do it as before my modifications:

static pool_allocator update_cost_record *update_cost_record_pool = NULL;

/* Initiate update cost records.  */
static void
init_update_cost_records (void)
{
 update_cost_record_pool = new pool_allocator update_cost_record
   (update cost records, 100);
}

I'm going to migrate rest of patches that use the same construct.

Thanks,
Martin

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Aldy Hernandez


On 05/27/2015 08:39 AM, Jason Merrill wrote:

On 05/20/2015 11:50 AM, Aldy Hernandez wrote:



+  /* Fill in the size of variable-length fields in late dwarf.  */
+  if (TREE_ASM_WRITTEN (type)
+   !early_dwarf_dumping)
+{
+  tree member;
+  for (member = TYPE_FIELDS (type); member; member = DECL_CHAIN
(member))
+fill_variable_array_bounds (TREE_TYPE (member));
+  return;
+}


Why is this happening in late dwarf?  I'm concerned that front-end
information that is necessary to do this might be lost by that point.


I thought only after the optimizations had run their course would we be 
guaranteed to have accurate bound information.  At least, that's what my 
experience showed.


Do you have something else in mind?




+  /* Variable-length types may be incomplete even if
+ TREE_ASM_WRITTEN.  For such types, fall through to
+ gen_array_type_die() and possibly fill in
+ DW_AT_{upper,lower}_bound attributes.  */
+  if ((TREE_CODE (type) != ARRAY_TYPE
+TREE_CODE (type) != RECORD_TYPE
+TREE_CODE (type) != UNION_TYPE
+TREE_CODE (type) != QUAL_UNION_TYPE)
+  || (TYPE_SIZE (type)
+   TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST))


Similarly, why check for INTEGER_CST here?


The INTEGER_CST check was supposed to mean we have bound information 
already, no need to look further.


I guess we could have a variable length bound that does not decay to a 
constant.  Perhaps I could check the presence of a cached DIE with a 
type DIE containing a DW_TAG_subrange_type *and* 
DW_AT_{lower,upper}_bound ??.  Basically I just want to add bound 
information, if available and not already present.


Suggestions?

Aldy

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Jeff Law


On 05/28/2015 04:42 AM, David Malcolm wrote:


Am I right in thinking that this is a statically-allocated object with a
non-trivial constructor?  i.e. that this constructor has to run before
main is entered?

Do our coding guidelines allow for this?  (I've been burned by this
before, on a buggy C++ runtime that didn't manage to support these).
I'm a little nervous about this, touching global state before
main (e.g. from the point-of-view of the JIT), though I don't know yet
if this is just a gut reaction, or if there's a valid concern here (I'm
officially on holiday this week, so I haven't had a chance to dig deeply
into these patches yet, sorry).
That idiom is used in various places by Martin's patches.   I didn't see 
a strong rhyme or reason behind why it was used over allocating 
something in automatic or heap storage.


As to supporting it, I'm not terribly concerned about other buggy C++ 
runtimes.  GCC bootstraps with GCC, which means we've got our C++ 
runtime.  The only worry becomes the low level bits that we build our 
static ctor/dtor support on top of -- and I haven't seen major problems 
with that for eons.


jeff

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu

On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com wrote:
 I have attached a patch that adds the new attribute noplt.  Please review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


2 comments:

1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
2. Don't you need to check

   !TARGET_MACHO
   !TARGET_SEH
   !TARGET_PECOFF

since it only works for ELF.

-- 
H.J.

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam

I have attached a patch that adds the new attribute noplt.  Please review.

* config/i386/i386.c (avoid_plt_to_call): New function.
(ix86_output_call_insn): Generate indirect call for functions
marked with noplt attribute.
(attribute_spec ix86_attribute_): Define new attribute noplt.
* doc/extend.texi: Document new attribute noplt.
* gcc.target/i386/noplt-1.c: New testcase.
* gcc.target/i386/noplt-2.c: New testcase.



Thanks
Sri

On Fri, May 22, 2015 at 2:00 AM, Pedro Alves pal...@redhat.com wrote:
 On 05/21/2015 11:02 PM, Sriraman Tallam wrote:
 On Thu, May 21, 2015 at 2:51 PM, Pedro Alves pal...@redhat.com wrote:
 On 05/21/2015 10:12 PM, Sriraman Tallam wrote:

 My original proposal, for x86_64 only, was to add
 -fno-plt=function-name. This lets the user decide for which
 functions PLT must be avoided.  Let the compiler always generate an
 indirect call using call *func@GOTPCREL(%rip).  We could do this for
 non-PIC code too.  No need for linker fixups since this relies on the
 user to know that func is from a shared object.

 Having to pass function names on the command line seems like an odd
 interface.  E.g, you'll need to pass the mangled name for
 C++ functions.  Any reason this isn't a function attribute?

 It is not clear to me where I would stick the attribute.  Example
 usage in foo.cc:

 #includestring.h

 int main() {
   int n = memcmp();
 }

 I want memcmp to not go through PLT, do you propose explicitly
 re-declaring it in foo.cc with the attribute?

 I guess you'd do:

 #includestring.h

 __attribute__((no_plt)) typeof (memcpy) memcpy;

 int main() {
   int n = memcmp();
 }

 or even:

 #includestring.h

 int main() {
   if (hotpath) {
 __attribute__((no_plt)) typeof (memcpy) memcpy;
 for (..) {
   int n = memcmp();
 }
   } else {
   int n = memcmp();
   }
 }

 or globally:

 $ cat no-plt/string.h:
 #include_next string.h
 __attribute__((no_plt)) typeof (memcpy) memcpy;

 $ gcc -I no-plt/ ...

 Thanks,
 Pedro Alves

* config/i386/i386.c (avoid_plt_to_call): New function.
(ix86_output_call_insn): Generate indirect call for functions
marked with noplt attribute.
(attribute_spec ix86_attribute_): Define new attribute noplt.
* doc/extend.texi: Document new attribute noplt.
* gcc.target/i386/noplt-1.c: New testcase.
* gcc.target/i386/noplt-2.c: New testcase.

Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 223720)
+++ config/i386/i386.c  (working copy)
@@ -25599,6 +25599,25 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx call
   return call;
 }
 
+/* Return true if the function being called was marked with attribute
+   noplt.  If this function is defined, this should return false.  */
+static bool
+avoid_plt_to_call (rtx call_op)
+{
+  if (GET_CODE (call_op) != SYMBOL_REF
+  || SYMBOL_REF_LOCAL_P (call_op))
+return false;
+
+  tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+  if (symbol_decl != NULL_TREE
+   TREE_CODE (symbol_decl) == FUNCTION_DECL
+   lookup_attribute (noplt, DECL_ATTRIBUTES (symbol_decl)))
+return true;
+
+  return false;
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
@@ -25611,7 +25630,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = %!jmp\t%P0;
+   {
+ if (TARGET_64BIT  avoid_plt_to_call (call_op))
+   xasm = jmp\t*%p0@GOTPCREL(%%rip);
+ else
+   xasm = jmp\t%P0;
+   }
   /* SEH epilogue detection requires the indirect branch case
 to include REX.W.  */
   else if (TARGET_SEH)
@@ -25654,7 +25678,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
 }
 
   if (direct_p)
-xasm = %!call\t%P0;
+{
+  if (TARGET_64BIT  avoid_plt_to_call (call_op))
+xasm = call\t*%p0@GOTPCREL(%%rip);
+  else
+xasm = call\t%P0;
+}
   else
 xasm = %!call\t%A0;
 
@@ -46628,6 +46657,9 @@ static const struct attribute_spec ix86_attribute_
 false },
   { callee_pop_aggregate_return, 1, 1, false, true, true,
 ix86_handle_callee_pop_aggregate_return, true },
+  /* Attribute to avoid calling function via PLT.  */
+  { noplt, 0, 0, true, false, false, ix86_handle_fndecl_attribute,
+false },
   /* End element.  */
   { NULL,0, 0, false, false, false, NULL, false }
 };
Index: doc/extend.texi
===
--- doc/extend.texi (revision 223720)
+++ doc/extend.texi (working copy)
@@ -4858,6 +4858,13 @@ On x86-32 targets, the @code{stdcall} attribute ca
 assume that the called function pops off the stack space used to
 pass arguments, unless it takes a variable number of arguments.
 
+@item noplt
+@cindex @code{noplt} function attribute, x86-64
+@cindex functions whose calls do not go via PLT
+On x86-64 targets. the @code{noplt}

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam

On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com wrote:
 I have attached a patch that adds the new attribute noplt.  Please review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

Ok, I will make this change. OTOH, is it just better to piggy-back on
existing -fno-plt change by Alex in calls.c
and do this:

Index: calls.c
===
--- calls.c (revision 223720)
+++ calls.c (working copy)
@@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
 targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
   ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
   : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic  !flag_plt  fndecl_or_type
+  else if (fndecl_or_type
 TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-!targetm.binds_local_p (fndecl_or_type))
+!targetm.binds_local_p (fndecl_or_type)
+((flag_pic  !flag_plt)
+   || (lookup_attribute (noplt, DECL_ATTRIBUTES(fndecl_or_type)
 {
   funexp = force_reg (Pmode, funexp);
 }


Thanks
Sri


 --
 H.J.

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu

On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 I have attached a patch that adds the new attribute noplt.  Please review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


Does it work on non-PIC calls?

-- 
H.J.

Re: [PATCH] Optimize (CST1 A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Marc Glisse


On Thu, 28 May 2015, Marek Polacek wrote:


This PR points out that we weren't able to optimize 1  x == 2 to just
x == 1.


Side note: if we are looking for extra patterns to simplify, llvm has an 
almost unlimited supply. Here are a few we don't seem to have (there are 
more where those came from), of course several need constraining / 
generalizing, it is just a list of hints I wrote for myself.


(A|B)  ~(AB) - A^B
(A | B)  ((~A) ^ B) - (A  B)
(A  (~B)) | (A ^ B) - (A ^ B)
((B | C)  A) | B - B | (A  C)
A | ( A ^ B) - A |  B
A | (~A ^ B) - A | ~B
(A ^ B)  ((B ^ C) ^ A) - (A ^ B)  ~C
(A ^ B) | ((B ^ C) ^ A) - (A ^ B) | C
(A  B) | (A ^ B) - (A | B)
A | ~(A ^ B) - A | ~B
(A  B) | ((~A) ^ B) - (~A ^ B)
~(~X  Y) - (X | ~Y)
~(~X s Y) - (X s Y)
(A  B)^(A | B) - A ^ B
(A | ~B) ^ (~A | B) - A ^ B
(A  ~B) ^ (~A  B) - A ^ B
(A ^ C)^(A | B) - ((~A)  B) ^ C
(A  B) ^ (A ^ B) - (A | B)
(A  ~B) ^ (~A) - ~(A  B)
(AB)+(A^B) - A|B
(AB)+(A|B) - A+B
(A|B)-(A^B) - AB
((X | Y) - X) - (~X  Y)
fmax(x,NaN) - x
fmax(a,fmax(a,b)) - fmax(a,b)
(X+2) u X - x u 256-2
(1  X)   30 - X = 4
((X  ~7) == 0) - X  8
2 * X  5 - X = 2
((1  x)8) == 0 - x != 3
((1  x)7) == 0 - x  2
Y - Z  X - Z - Y  X
3 * X == 3 * Y - X == Y
A  3 == B  3 - (A ^ B)  8
(float)int = 4.4 - int = 4
x unle x - x ord x



+/* (CST1  A) == CST2 - A == log2 (CST2 / CST1)
+   (CST1  A) != CST2 - A != log2 (CST2 / CST1)
+   if CST2 is a multiple of CST1.  */
+(for cmp (ne eq)
+ (simplify
+  (cmp (lshift@3 INTEGER_CST@0 @1) INTEGER_CST@2)
+  (if ((TREE_CODE (@3) != SSA_NAME || has_single_use (@3))
+wi::multiple_of_p (@2, @0, TYPE_SIGN (type)))


Doesn't type refer to the result of the EQ_EXPR here?


On Thu, 28 May 2015, Jakub Jelinek wrote:


Is CST2 a multiple of CST1 the best test though?
I mean say in
(0x8001U  x) == 0x2U
0x2U isn't a multiple of 0x8001U, yet there is only one
valid value of x for which it holds (17), so we could very well
optimize that to x == 17.
If popcount of the CST1 is 1, then multiple_of_p is supposedly sufficient
(have you checked if CST1 is negative that it still works?), for others
supposedly we could have a helper function that would just try
in a loop all shift counts from 0 to precision - 1, and note when
(CST1  b) == CST2 - if for no b, then it should fold regardless of
has_single_use to false or true, if for exactly one shift count, then
use a comparison against that shift count, otherwise give up?


ctz(CST2)-ctz(CST1) should provide a single candidate without looping. 
ctz(CST1) is also relevant when CST2==0.


--
Marc Glisse

[patch] fix bootstrap on FreeBSD i386/arm

2015-05-28 Thread Andreas Tobler


All,

This patch restores bootstrap on i386-*-freebsd*.
The build was failing after the introduction of -std=c++98 
configure/build flag. The -std=c++98 enables strict_ansi and on FreeBSD 
the libc function atoll is not defined for this.


But the configure always stated atoll available.

A bit debugging showed, to my understanding now, the AC_CHECK_FUNCS only 
checks it a func is available, not more. While the gcc_AC_CHECK_DECLS 
really recognises the build flags and tells me that atoll w/o std=c++98 
is available and with std=c++98 it is not available.


So, the below patch addresses this and restores bootstrap on FreeBSD.

Ok for trunk?

Thanks,
Andreas

2015-05-28  Andreas Tobler  andre...@gcc.gnu.org

* configure.ac: Move the atoll check from AC_CHECK_FUNCS to
gcc_AC_CHECK_DECLS.
* configure: Regenerate.

Index: configure.ac
===
--- configure.ac(revision 223845)
+++ configure.ac(working copy)
@@ -1149,7 +1149,7 @@
   fileno_unlocked fprintf_unlocked fputc_unlocked fputs_unlocked dnl
   fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked dnl
   putchar_unlocked putc_unlocked)
-AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoll atoq \
+AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoq \
popen sysconf strsignal getrusage nl_langinfo \
gettimeofday mbstowcs wcswidth mmap setlocale \
gcc_UNLOCKED_FUNCS madvise)
@@ -1213,7 +1213,7 @@
 #include ansidecl.h
 #include system.h])
 
-gcc_AC_CHECK_DECLS(getenv atol asprintf sbrk abort atof getcwd getwd \
+gcc_AC_CHECK_DECLS(getenv atol atoll asprintf sbrk abort atof getcwd getwd \
madvise stpcpy strnlen strsignal strverscmp \
strtol strtoul strtoll strtoull \
errno snprintf vsnprintf vasprintf malloc realloc calloc \

Re: [CHKP, PATCH] Fix instrumented indirect calls with propagated pointers

2015-05-28 Thread Jan Hubicka

  2015-04-14  Ilya Enkovich  ilya.enkov...@intel.com
 
  PR target/65527
  * cgraph.c (cgraph_edge::redirect_call_stmt_to_callee): Add
  redirection for instrumented calls.
  * lto-wrapper.c (merge_and_complain): Merge 
  -fcheck-pointer-bounds.
  (append_compiler_options): Append -fcheck-pointer-bounds.
  * tree-chkp.h (chkp_copy_call_skip_bounds): New.
  (chkp_redirect_edge): New.
  * tree-chkp.c (chkp_copy_call_skip_bounds): New.
  (chkp_redirect_edge): New.
 
  gcc/testsuite/
 
  2015-04-14  Ilya Enkovich  ilya.enkov...@intel.com
 
  PR target/65527
  * gcc.target/i386/mpx/chkp-fix-calls-1.c: New.
  * gcc.target/i386/mpx/chkp-fix-calls-2.c: New.
  * gcc.target/i386/mpx/chkp-fix-calls-3.c: New.
  * gcc.target/i386/mpx/chkp-fix-calls-4.c: New.

OK.

Honza

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam

On Thu, May 28, 2015 at 2:52 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 2:27 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 2:01 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 1:54 PM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 12:05 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 I have attached a patch that adds the new attribute noplt.  Please 
 review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, 
 DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


 Does it work on non-PIC calls?

 You are right, it doesnt work.  I have attached the patch with the
 changes you mentioned.


 Since direct_p is true, do wee need

 +  if (GET_CODE (call_op) != SYMBOL_REF
 +  || SYMBOL_REF_LOCAL_P (call_op))
 +return false;

 We do need it right because  for this case below, I do not want an
 indirect call:

 __attribute__((noplt))
 int foo() {
   return 0;
 }

 int main()
 {
   return foo();
 }

 Assuming foo is not inlined, if I remove the lines you mentioned, I
 will get an indirect call which is unnecessary.


 I meant the GET_CODE (call_op) != SYMBOL_REF part isn't
 needed.

I should have realized that :), sorry.  Patch fixed.

Thanks
Sri




 --
 H.J.
* config/i386/i386.c (avoid_plt_to_call): New function.
(ix86_output_call_insn): Generate indirect call for functions
marked with noplt attribute.
(attribute_spec ix86_attribute_): Define new attribute noplt.
* doc/extend.texi: Document new attribute noplt.
* gcc.target/i386/noplt-1.c: New testcase.
* gcc.target/i386/noplt-2.c: New testcase.

Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 223720)
+++ config/i386/i386.c  (working copy)
@@ -25599,6 +25599,24 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx call
   return call;
 }
 
+/* Return true if the function being called was marked with attribute
+   noplt.  If this function is defined, this should return false.  */
+static bool
+avoid_plt_to_call (rtx call_op)
+{
+  if (SYMBOL_REF_LOCAL_P (call_op))
+return false;
+
+  tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+  if (symbol_decl != NULL_TREE
+   TREE_CODE (symbol_decl) == FUNCTION_DECL
+   lookup_attribute (noplt, DECL_ATTRIBUTES (symbol_decl)))
+return true;
+
+  return false;
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
@@ -25611,7 +25629,13 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = %!jmp\t%P0;
+   {
+ if (!TARGET_MACHO  !TARGET_SEH  !TARGET_PECOFF
+  TARGET_64BIT  avoid_plt_to_call (call_op))
+   xasm = %!jmp\t*%p0@GOTPCREL(%%rip);
+ else
+   xasm = %!jmp\t%P0;
+   }
   /* SEH epilogue detection requires the indirect branch case
 to include REX.W.  */
   else if (TARGET_SEH)
@@ -25654,7 +25678,13 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
 }
 
   if (direct_p)
-xasm = %!call\t%P0;
+{
+  if (!TARGET_MACHO  !TARGET_SEH  !TARGET_PECOFF
+  TARGET_64BIT  avoid_plt_to_call (call_op))
+xasm = %!call\t*%p0@GOTPCREL(%%rip);
+  else
+xasm = %!call\t%P0;
+}
   else
 xasm = %!call\t%A0;
 
@@ -46628,6 +46658,9

[expmed] Avoid clobbering a yet-to-be-used base/index register.

2015-05-28 Thread DJ Delorie


20040625-1 fails on targets with pointers bigger than WORD_SIZE (rl78,
msp430/-mlarge) because the base register is clobbered, partially
rebuilt with the new value, then used as a base for the second part of
the calculation.  Ok?

* expmed.c (extract_bit_field_1): Avoid clobbering a
yet-to-be-used base/index register.

 
Index: expmed.c
===
--- expmed.c(revision 223850)
+++ expmed.c(working copy)
@@ -1613,12 +1613,17 @@ extract_bit_field_1 (rtx str_rtx, unsign
   unsigned int i;
   rtx_insn *last;
 
   if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
target = gen_reg_rtx (mode);
 
+  /* In case we're about to clobber a base register or something 
+(see gcc.c-torture/execute/20040625-1.c).   */
+  if (reg_mentioned_p (target, str_rtx))
+   target = gen_reg_rtx (mode);
+
   /* Indicate for flow that the entire target reg is being set.  */
   emit_clobber (target);
 
   last = get_last_insn ();
   for (i = 0; i  nwords; i++)
{

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu

On Thu, May 28, 2015 at 4:54 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 2:52 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 2:27 PM, Sriraman Tallam tmsri...@google.com wrote:
 On Thu, May 28, 2015 at 2:01 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 1:54 PM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 12:05 PM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam tmsri...@google.com 
 wrote:
 On Thu, May 28, 2015 at 11:42 AM, H.J. Lu hjl.to...@gmail.com wrote:
 On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam 
 tmsri...@google.com wrote:
 I have attached a patch that adds the new attribute noplt.  Please 
 review.

 * config/i386/i386.c (avoid_plt_to_call): New function.
 (ix86_output_call_insn): Generate indirect call for functions
 marked with noplt attribute.
 (attribute_spec ix86_attribute_): Define new attribute noplt.
 * doc/extend.texi: Document new attribute noplt.
 * gcc.target/i386/noplt-1.c: New testcase.
 * gcc.target/i386/noplt-2.c: New testcase.


 2 comments:

 1. Don't remove %! prefix before call/jmp.  It is needed for MPX.
 2. Don't you need to check

!TARGET_MACHO
!TARGET_SEH
!TARGET_PECOFF

 since it only works for ELF.

 Ok, I will make this change. OTOH, is it just better to piggy-back on
 existing -fno-plt change by Alex in calls.c
 and do this:

 Index: calls.c
 ===
 --- calls.c (revision 223720)
 +++ calls.c (working copy)
 @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
  targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
 -  else if (flag_pic  !flag_plt  fndecl_or_type
 +  else if (fndecl_or_type
  TREE_CODE (fndecl_or_type) == FUNCTION_DECL
 -!targetm.binds_local_p (fndecl_or_type))
 +!targetm.binds_local_p (fndecl_or_type)
 +((flag_pic  !flag_plt)
 +   || (lookup_attribute (noplt, 
 DECL_ATTRIBUTES(fndecl_or_type)
  {
funexp = force_reg (Pmode, funexp);
  }


 Does it work on non-PIC calls?

 You are right, it doesnt work.  I have attached the patch with the
 changes you mentioned.


 Since direct_p is true, do wee need

 +  if (GET_CODE (call_op) != SYMBOL_REF
 +  || SYMBOL_REF_LOCAL_P (call_op))
 +return false;

 We do need it right because  for this case below, I do not want an
 indirect call:

 __attribute__((noplt))
 int foo() {
   return 0;
 }

 int main()
 {
   return foo();
 }

 Assuming foo is not inlined, if I remove the lines you mentioned, I
 will get an indirect call which is unnecessary.


 I meant the GET_CODE (call_op) != SYMBOL_REF part isn't
 needed.

 I should have realized that :), sorry.  Patch fixed.


--- testsuite/gcc.target/i386/noplt-1.c (revision 0)
+++ testsuite/gcc.target/i386/noplt-1.c (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target x86_64-*-* } } */
...
+/* { dg-final { scan-assembler call\[
\t\]\\*.*foo.*@GOTPCREL\\(%rip\\) } } */

The test will fail on Windows and Darwin.


-- 
H.J.

Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Jason Merrill


Looks good.

Jason

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Trevor Saunders

On Thu, May 28, 2015 at 06:42:57AM -0400, David Malcolm wrote:
 On Wed, 2015-05-27 at 15:56 +0200, mliska wrote:
  gcc/ChangeLog:
  
  2015-04-30  Martin Liska  mli...@suse.cz
  
  * ira-color.c (init_update_cost_records): Use new type-based pool 
  allocator.
  (get_update_cost_record): Likewise.
  (free_update_cost_record_list): Likewise.
  (finish_update_cost_records): Likewise.
  (initiate_cost_update): Likewise.
  ---
   gcc/ira-color.c | 19 +--
   1 file changed, 5 insertions(+), 14 deletions(-)
  
  diff --git a/gcc/ira-color.c b/gcc/ira-color.c
  index 4750714..4aec98e 100644
  --- a/gcc/ira-color.c
  +++ b/gcc/ira-color.c
  @@ -1166,16 +1166,8 @@ setup_profitable_hard_regs (void)
  allocnos.  */
   
   /* Pool for update cost records.  */
  -static alloc_pool update_cost_record_pool;
  -
  -/* Initiate update cost records.  */
  -static void
  -init_update_cost_records (void)
  -{
  -  update_cost_record_pool
  -= create_alloc_pool (update cost records,
  -sizeof (struct update_cost_record), 100);
  -}
  +static pool_allocatorupdate_cost_record update_cost_record_pool
  +  (update cost records, 100);
 
 Am I right in thinking that this is a statically-allocated object with a
 non-trivial constructor?  i.e. that this constructor has to run before
 main is entered?

yes though I think it'd be pretty easy to make it basically trivial but
with a static initializer because gcc doesn't optimize them well, and
with a bit more work we could probably get rid of the static initializer
without actually fixing gcc.

 Do our coding guidelines allow for this?  (I've been burned by this
 before, on a buggy C++ runtime that didn't manage to support these).

I'm pretty sure there already are some iirc the pretty printers are one
example.

 I'm a little nervous about this, touching global state before
 main (e.g. from the point-of-view of the JIT), though I don't know yet
 if this is just a gut reaction, or if there's a valid concern here (I'm

afaik it should work fine.  Of course this is global data which isn't
great, but that's a preexisting problem.

Trev

Re: [CHKP, PATCH] Fix LTO cgraph merge for instrumented functions

2015-05-28 Thread Jan Hubicka

 Ping

I am really sorry for ignoring this so long - I would like to reorg the code
and replace instrumentaiton thunks by the notion of transparent aliases,
but did not have time to do that yet.  Have quite busy time now.
 
  2015-04-14  Ilya Enkovich  ilya.enkov...@intel.com
 
  * ipa.c (symbol_table::remove_unreachable_nodes): Don't
  remove instumentation thunks calling reachable functions.
  * lto-cgraph.c (output_refs): Always output IPA_REF_CHKP.
  * lto/lto-partition.c (privatize_symbol_name_1): New.
  (privatize_symbol_name): Privatize both decl and orig_decl
  names for instrumented functions.
 
  gcc/testsuite/
 
  2015-04-14  Ilya Enkovich  ilya.enkov...@intel.com
 
  * gcc.dg/lto/chkp-privatize-1_0.c: New.
  * gcc.dg/lto/chkp-privatize-1_1.c: New.
  * gcc.dg/lto/chkp-privatize-2_0.c: New.
  * gcc.dg/lto/chkp-privatize-2_1.c: New.
 
 
  diff --git a/gcc/ipa.c b/gcc/ipa.c
  index b3752de..3054afe 100644
  --- a/gcc/ipa.c
  +++ b/gcc/ipa.c
  @@ -492,7 +492,22 @@ symbol_table::remove_unreachable_nodes (FILE *file)
  }
else if (cnode-thunk.thunk_p)
  enqueue_node (cnode-callees-callee, first, reachable);
  -
  +
  + /* For instrumentation clones we always need original
  +function node for proper LTO privatization.  */
  + if (cnode-instrumentation_clone
  +  reachable.contains (cnode)

reachable.contains (cnode) is !in_boundary_p.

  +  cnode-definition)
  +   {
  + gcc_assert (cnode-instrumented_version || in_lto_p);
  + if (cnode-instrumented_version)
  +   {
  + enqueue_node (cnode-instrumented_version, first,
  +   reachable);
  + reachable.add (cnode-instrumented_version);

Why do you need the other tests.  Can we have instrumentation node that is not 
definition?
I suppose you can remove if (cnode-instrumented_version) because you assert it 
anyway.

  -  if (cnode)
  +  if (cgraph_node *cnode = dyn_cast cgraph_node * (node))
   {
 tree iname = NULL_TREE;
 if (cnode-instrumentation_clone)
  -   iname = DECL_ASSEMBLER_NAME (cnode-decl);
  +   {
  + /* If we want to privatize instrumentation clone
  +then we also need to privatize original function.  */
  + if (cnode-instrumented_version)
  +   privatize_symbol_name (cnode-instrumented_version);
  + else
  +   privatize_symbol_name_1 (cnode, cnode-orig_decl);
  + iname = DECL_ASSEMBLER_NAME (cnode-decl);
  + TREE_CHAIN (iname) = DECL_ASSEMBLER_NAME (cnode-orig_decl);
  +   }
 else if (cnode-instrumented_version
  -   cnode-instrumented_version-orig_decl == decl)
  -   iname = DECL_ASSEMBLER_NAME (cnode-instrumented_version-decl);
  -
  -  if (iname)
  +   cnode-instrumented_version-orig_decl == cnode-decl)
  {
  - gcc_assert (IDENTIFIER_TRANSPARENT_ALIAS (iname));
  - TREE_CHAIN (iname) = DECL_ASSEMBLER_NAME (decl);
  + iname = DECL_ASSEMBLER_NAME (cnode-instrumented_version-decl);
  + TREE_CHAIN (iname) = DECL_ASSEMBLER_NAME (cnode-decl);
  }
   }
  -  if (symtab-dump_file)
  -fprintf (symtab-dump_file,
  -   Privatizing symbol name: %s - %s\n,
  -   name, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
  +

I think we ought to have verify_symtab_node checking for this.  All the 
handling of the
name links seems somewhat fragile (I am mostly concerned about getting it right 
for
LTO before remaning takes place)

OK with the changes above for mainline and for branch if it does not cause 
problems
for a week.

Honza
 return true;
   }
 
  diff --git a/gcc/testsuite/gcc.dg/lto/chkp-privatize-1_0.c 
  b/gcc/testsuite/gcc.dg/lto/chkp-privatize-1_0.c
  new file mode 100644
  index 000..2054aa15
  --- /dev/null
  +++ b/gcc/testsuite/gcc.dg/lto/chkp-privatize-1_0.c
  @@ -0,0 +1,17 @@
  +/* { dg-lto-do link } */
  +/* { dg-require-effective-target mpx } */
  +/* { dg-lto-options { { -Ofast -flto -fcheck-pointer-bounds -mmpx } } } 
  */
  +
  +extern int __attribute__((noinline)) f1 (int i);
  +
  +static int __attribute__((noinline))
  +f2 (int i)
  +{
  +  return i + 6;
  +}
  +
  +int
  +main (int argc, char **argv)
  +{
  +  return f1 (argc) + f2 (argc);
  +}
  diff --git a/gcc/testsuite/gcc.dg/lto/chkp-privatize-1_1.c 
  b/gcc/testsuite/gcc.dg/lto/chkp-privatize-1_1.c
  new file mode 100644
  index 000..4fa8656
  --- /dev/null
  +++ b/gcc/testsuite/gcc.dg/lto/chkp-privatize-1_1.c
  @@ -0,0 +1,11 @@
  +static int __attribute__((noinline))
  +f2 (int i)
  +{
  +  return 2 * i;
  +}
  +
  +int __attribute__((noinline))
  +f1 (int i)
  +{
  +  return f2 (i) + 10;
  +}
  diff --git a/gcc/testsuite/gcc.dg/lto/chkp-privatize-2_0.c

Re: [debug-early] fix problem with template parameter packs

2015-05-28 Thread Richard Biener

On Wed, May 27, 2015 at 9:34 PM, Jason Merrill ja...@redhat.com wrote:
 OK, I see the issue.  We're calling debug_abstract_function to build debug
 info for the abstract instance of a function that we already built from
 dwarf2out_early_global_decl.

 It occurs to me that the early-dwarf work should make
 debug_abstract_function and most of the DECL_ABSTRACT handling obsolete.
 All we need to do is set DW_AT_inline during early debug and update it
 during late debug if the function is inlined.

Yes, that was my idea as well.  The early dwarf _is_ the abstract
variant after all (until we annotate it further without going through
another indirection like I did for LTO).

Richard.

 Jason

Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]

2015-05-28 Thread Bernhard Reutner-Fischer

On 3 February 2015 at 01:07, Mike Stump mikest...@comcast.net wrote:
 On Feb 2, 2015, at 3:22 PM, Bernhard Reutner-Fischer rep.dot@gmail.com 
 wrote:
 Untested draft patch

 I looked it over, seems to slot in nicely.


 +   gfc_error (%GCC unroll% directive does not commence a loop at %C”);

 So, don’t like commence here.

Does anybody have a better suggestion?

directive not at the start of a loop at %C
directive not followed by a loop at %C

dunno..

Mike, did you tweak the one or two things you got from the reviews
yet? ISTM your
main patch was not OKed yet nor installed.

thanks,

Re: fix pr65369.c testcase

2015-05-28 Thread Richard Biener

On Thu, May 28, 2015 at 7:11 AM, DJ Delorie d...@redhat.com wrote:

 Copied the way other tests get uint32_t.  Ok?

Ok.

 * gcc.c-torture/execute/pr65369.c: Don't assume int is 32 bits.

 Index: gcc.c-torture/execute/pr65369.c
 ===
 --- gcc.c-torture/execute/pr65369.c (revision 223737)
 +++ gcc.c-torture/execute/pr65369.c (working copy)
 @@ -1,7 +1,8 @@
  /* PR tree-optimization/65369 */
 +#include stdint.h

  static const char data[] =
12345678901234567890123456789012345678901234567890
123456789012345678901234567890;

  __attribute__ ((noinline))
 @@ -11,13 +12,13 @@ static void foo (const unsigned int *buf
  __builtin_abort ();
  }

  __attribute__ ((noinline))
  static void bar (const unsigned char *block)
  {
 -  unsigned int buf[16];
 +  uint32_t buf[16];
__builtin_memcpy (buf +  0, block +  0, 4);
__builtin_memcpy (buf +  1, block +  4, 4);
__builtin_memcpy (buf +  2, block +  8, 4);
__builtin_memcpy (buf +  3, block + 12, 4);
__builtin_memcpy (buf +  4, block + 16, 4);
__builtin_memcpy (buf +  5, block + 20, 4);

[PATCH v3] libiberty: cleanup Makefile.in

2015-05-28 Thread Bernhard Reutner-Fischer

* configure.ac (TARGETLIB_PIC, TARGETLIB_NOASAN): New variables.
* configure: Regenerate.
* maint-tool: Refactor pic/ and noasan/ handling.
* Makefile.in: Likewise. Regenerate dependencies.

---
The below does the same but attempts to be limited to what POSIX
requires a make(1) to provide. Seems to compile fine with gnu-make and
bmake.

Changes v2 - v3:
- drop unneeded SUB_CFLAGS
- add dependencies on %.c for stamp-pic-ofiles and stamp-noasan-ofiles

Ok for trunk?

Signed-off-by: Bernhard Reutner-Fischer rep.dot@gmail.com
---
 libiberty/Makefile.in  | 2408 +---
 libiberty/configure|8 +
 libiberty/configure.ac |6 +
 libiberty/maint-tool   |   33 +-
 4 files changed, 1487 insertions(+), 968 deletions(-)

diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
index f06cc69..ca4e75f 100644
--- a/libiberty/Makefile.in
+++ b/libiberty/Makefile.in
@@ -68,6 +68,7 @@ MAKEOVERRIDES =
 
 TARGETLIB = ./libiberty.a
 TESTLIB = ./testlib.a
+TARGETLIBS = $(TARGETLIB) @TARGETLIB_PIC@ @TARGETLIB_NOASAN@
 
 LIBOBJS = @LIBOBJS@
 
@@ -102,7 +103,7 @@ FLAGS_TO_PASS = \
 SUBDIRS = testsuite
 
 # FIXME: add @BUILD_INFO@ once we're sure it works for everyone.
-all: stamp-picdir stamp-noasandir $(TARGETLIB) required-list all-subdir
+all: $(TARGETLIBS) required-list all-subdir
@: $(MAKE) ; $(MULTIDO) $(FLAGS_TO_PASS) multi-do DO=all
 
 .PHONY: check installcheck
@@ -244,24 +245,24 @@ INSTALLED_HEADERS =   
  \
$(INCDIR)/timeval-utils.h
 
 $(TARGETLIB): $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS)
-   -rm -f $(TARGETLIB) pic/$(TARGETLIB) noasan/$(TARGETLIB)
-   $(AR) $(AR_FLAGS) $(TARGETLIB) \
+   -rm -f $@
+   $(AR) $(AR_FLAGS) $@ \
  $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS)
-   $(RANLIB) $(TARGETLIB)
-   if [ x$(PICFLAG) != x ]; then \
- cd pic; \
- $(AR) $(AR_FLAGS) $(TARGETLIB) \
-   $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS); \
- $(RANLIB) $(TARGETLIB); \
- cd ..; \
-   else true; fi; \
-   if [ x$(NOASANFLAG) != x ]; then \
- cd noasan; \
- $(AR) $(AR_FLAGS) $(TARGETLIB) \
-   $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS); \
- $(RANLIB) $(TARGETLIB); \
- cd ..; \
-   else true; fi
+   $(RANLIB) $@
+
+TARGETLIB_PIC_OFILES = `echo $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS) | \
+sed -e 's,[^/ ]*/,./pic/,g';`
+./pic/libiberty.a: stamp-pic-ofiles
+   -rm -f $@
+   $(AR) $(AR_FLAGS) $@ $(TARGETLIB_PIC_OFILES)
+   $(RANLIB) $@
+
+TARGETLIB_NOASAN_OFILES =`echo $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS) | 
\
+sed -e 's,[^/ ]*/,./noasan/,g';`
+./noasan/libiberty.a: stamp-noasan-ofiles
+   -rm -f $@
+   $(AR) $(AR_FLAGS) $@ $(TARGETLIB_NOASAN_OFILES)
+   $(RANLIB) $@
 
 $(TESTLIB): $(REQUIRED_OFILES) $(CONFIGURED_OFILES)
-rm -f $(TESTLIB)
@@ -393,17 +394,15 @@ install_to_tooldir: all
 required-list: Makefile
echo $(REQUIRED_OFILES)  required-list
 
-stamp-picdir:
-   if [ x$(PICFLAG) != x ]  [ ! -d pic ]; then \
- mkdir pic; \
-   else true; fi
-   touch stamp-picdir
+stamp-pic-ofiles: $(CFILES:%=$(srcdir)/%)
+   [ -d pic ]  : || mkdir pic
+   $(MAKE) $(FLAGS_TO_PASS) $(TARGETLIB_PIC_OFILES)
+   touch $@
 
-stamp-noasandir:
-   if [ x$(NOASANFLAG) != x ]  [ ! -d noasan ]; then \
- mkdir noasan; \
-   else true; fi
-   touch stamp-noasandir
+stamp-noasan-ofiles: $(CFILES:%=$(srcdir)/%)
+   [ -d noasan ]  : || mkdir noasan
+   $(MAKE) $(FLAGS_TO_PASS) $(TARGETLIB_NOASAN_OFILES)
+   touch $@
 
 .PHONY: all etags tags ls clean stage1 stage2
 
@@ -444,7 +443,7 @@ maint-deps :
 mostlyclean: mostlyclean-subdir
-rm -rf *.$(objext) pic noasan core errs \#* *.E a.out
-rm -f errors dummy config.h stamp-*
-   -rm -f $(CONFIG_H) stamp-picdir stamp-noasandir
+   -rm -f $(CONFIG_H)
-rm -f libiberty.aux libiberty.cp libiberty.cps libiberty.fn 
libiberty.ky
-rm -f libiberty.log libiberty.tmp libiberty.tps libiberty.pg
-rm -f libiberty.pgs libiberty.toc libiberty.tp libiberty.tpl 
libiberty.vr
@@ -501,9 +500,6 @@ maintainer-clean-subdir: config.h
  cd $$dir  $(MAKE) $(FLAGS_TO_PASS) $$target; \
done
 
-$(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS): stamp-picdir stamp-noasandir
-$(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
-
 # Don't export variables to the environment, in order to not confuse
 # configure.
 .NOEXPORT:
@@ -511,1200 +507,1708 @@ $(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
 # The dependencies in the remainder of this file are automatically
 # generated by make maint-deps.  Manual edits will be lost.
 
-./_doprnt.$(objext): $(srcdir)/_doprnt.c config.h $(INCDIR)/ansidecl.h \
-   $(INCDIR)/safe-ctype.h
-   if [ x$(PICFLAG) != x ]; then

[PATCH] auto-wipe dump files, part1, manual stuff

2015-05-28 Thread Bernhard Reutner-Fischer

Ontop of this patch you would have to

git grep -l -E (cleanup-.*-dump|cleanup-saved-temps) \
  | egrep -v (ChangeLog|/lib/) \
  | sed -e s|[^/]*$|| | sort | uniq \
  | while read d;
do
  find $d -type f -exec \
sed -i -e /cleanup-[^-]*[-]*dump/d;/cleanup-saved-temps/d {} +
done
---

I had to add dg-keep-saved-temps for the following two testcases:
g++.dg/pch/pch.C gcc.dg/pch/save-temps-1.c

The following testcases had
/* { dg-final { cleanup-saved-temps ivopts } } */
resp cleanup-saved-temps pr43597 or [dt][fi]mode_off
which does nothing AFAICT and thus will be removed by above sed.
gcc.target/arm/ivopts-2.c
gcc.target/arm/ivopts-3.c
gcc.target/arm/ivopts-4.c
gcc.target/arm/ivopts-5.c
gcc.target/arm/ivopts.c
gcc.target/arm/pr43597.c
gcc.target/powerpc/dfmode_off.c
gcc.target/powerpc/dimode_off.c
gcc.target/powerpc/tfmode_off.c
gcc.target/powerpc/timode_off.c

I do expect fallout for a couple of days when people push new testcases
and don't have picked up the removal of the procs in their test-runs just yet.

Since you, Mike, OKed the patch in gcc-5 stage-4 for the next stage1, i will
commit this patch (plus the sed in the same revision) in a couple of
hours.

gcc/testsuite/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  al...@gcc.gnu.org

* lib/gcc-dg.exp (cleanup-ipa-dump, cleanup-rtl-dump,
cleanup-tree-dump, cleanup-dump, cleanup-saved-temps): Remove.
Adjust all callers.
(schedule-cleanups, dg-keep-saved-temps): New proc.
(gcc-dg-test-1): Schedule cleanups.
* lib/profopt.exp (profopt-execute): Likewise.
* g++.dg/cdce3.C: Adjust expected line numbers.
* gcc.dg/cdce1.c: Likewise.
* gcc.dg/cdce2.c: Likewise.
* gcc.dg/strlenopt-22.c: Fix comment delimiter.
* gcc.dg/strlenopt-24.c: Likewise.
* gcc.dg/tree-ssa/vrp26.c: Likewise.
* gcc.dg/tree-ssa/vrp28.c: Likewise.
* obj-c++.dg/encode-2.mm: Likewise.

libgomp/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  al...@gcc.gnu.org

* testsuite/libgomp.graphite/bounds.c: Adjust for
cleanup-tree-dump removal.
* testsuite/libgomp.graphite/force-parallel-1.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-2.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-3.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-4.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-5.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-6.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-7.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-8.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-9.c: Likewise.
* testsuite/libgomp.graphite/pr41118.c: Likewise.


gcc/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  al...@gcc.gnu.org

* config/arm/neon-testgen.ml (emit_epilogue): Remove manual call
to cleanup-saved-temps.

gcc/doc/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  al...@gcc.gnu.org

* doc/sourcebuild.texi (Clean up generated test files): Expand
introduction.
(cleanup-ipa-dump, cleanup-rtl-dump, cleanup-tree-dump,
cleanup-saved-temps): Remove.
(dg-keep-saved-temps): Document new proc.


 gcc/config/arm/neon-testgen.ml  |1 -
 gcc/doc/sourcebuild.texi|   30 +++---
 gcc/testsuite/g++.dg/cdce3.C|5 +-
 gcc/testsuite/g++.dg/pch/pch.C  |3 +-
 gcc/testsuite/gcc.dg/cdce1.c|3 +-
 gcc/testsuite/gcc.dg/cdce2.c|3 +-
 gcc/testsuite/gcc.dg/pch/save-temps-1.c |3 +-
 gcc/testsuite/gcc.dg/strlenopt-22.c |3 +-
 gcc/testsuite/gcc.dg/strlenopt-24.c |3 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp26.c   |3 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp28.c   |3 +-
 gcc/testsuite/lib/gcc-dg.exp|  170 +++
 gcc/testsuite/lib/profopt.exp   |3 +
 gcc/testsuite/obj-c++.dg/encode-2.mm|3 +-
 14 files changed, 157 insertions(+), 79 deletions(-)

diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml
index 0a2a442..3164ab7 100644
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -138,7 +138,6 @@ let emit_epilogue chan features regexps =
  else
()
 );
-Printf.fprintf chan /* { dg-final { cleanup-saved-temps } } */\n
 
 (* Check a list of C types to determine which ones are pointers and which
ones are const.  *)
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index c6ef40e..cb41b01 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2210,13 +2210,17 @@ Check branch and/or call counts, in addition to line 
counts, in
 
 @subsubsection Clean up generated test files
 
+Usually the test-framework removes files that were generated during
+testing. If a testcase, for example, uses any dumping mechanism to
+inspect a passes dump file, the

[PATCH] Refactor vectorizer cost model

2015-05-28 Thread Richard Biener


This refactors the vectorizer cost model to call it when everything is
ready, avoiding some fixups.  It also fixes cost compute for
SLP reductions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-05-28  Richard Biener  rguent...@suse.de

* tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec
member.
(SLP_INSTANCE_BODY_COST_VEC): Remove.
(vect_update_slp_costs_according_to_vf): Likewise.
(vect_slp_analyze_operations): Update prototype.
* tree-vect-loop.c (vect_analyze_loop_2): Remove call to
vect_update_slp_costs_according_to_vf, adjust.
* tree-vect-slp.c (vect_free_slp_instance): Adjust.
(vect_analyze_slp_cost_1): Likewise.
(vect_analyze_slp_cost): Likewise.  Properly deal with
widening reduction ops.  Commit body costs.
(vect_analyze_slp_instance): Adjust.  Do not analyze SLP
cost for loops from here.
(vect_slp_analyze_operations): But do it from here when
the vectorization factor is known and stmts are analyzed.
(vect_bb_vectorization_profitable_p): Simplify.
(vect_slp_analyze_bb_1): Do not compute SLP cost here.
(vect_update_slp_costs_according_to_vf): Remove.


Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 223743)
+++ gcc/tree-vect-loop.c(working copy)
@@ -1814,15 +1855,12 @@ vect_analyze_loop_2 (loop_vec_info loop_
  /* Update the vectorization factor based on the SLP decision.  */
  vect_update_vf_for_slp (loop_vinfo);
 
- /* Once VF is set, SLP costs should be updated since the number of
-created vector stmts depends on VF.  */
- vect_update_slp_costs_according_to_vf (loop_vinfo);
-
  /* Analyze operations in the SLP instances.  Note this may
 remove unsupported SLP instances which makes the above
 SLP kind detection invalid.  */
  unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
- vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+ vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+  LOOP_VINFO_TARGET_COST_DATA 
(loop_vinfo));
  if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
return false;
}
Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c (revision 223743)
+++ gcc/tree-vect-slp.c (working copy)
@@ -130,7 +130,6 @@ vect_free_slp_instance (slp_instance ins
 {
   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
   SLP_INSTANCE_LOADS (instance).release ();
-  SLP_INSTANCE_BODY_COST_VEC (instance).release ();
   free (instance);
 }
 
@@ -1546,13 +1545,11 @@ vect_find_last_scalar_stmt_in_slp (slp_t
 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-slp_instance instance, slp_tree node,
+vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
 stmt_vector_for_cost *prologue_cost_vec,
+stmt_vector_for_cost *body_cost_vec,
 unsigned ncopies_for_cost)
 {
-  stmt_vector_for_cost *body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
-
   unsigned i;
   slp_tree child;
   gimple stmt, s;
@@ -1563,9 +1560,8 @@ vect_analyze_slp_cost_1 (loop_vec_info l
   /* Recurse down the SLP tree.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
 if (child)
-  vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
-  instance, child, prologue_cost_vec,
-  ncopies_for_cost);
+  vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
+  body_cost_vec, ncopies_for_cost);
 
   /* Look at the first scalar stmt to determine the cost.  */
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1622,7 +1618,8 @@ vect_analyze_slp_cost_1 (loop_vec_info l
   enum vect_def_type dt;
   if (!op || op == lhs)
continue;
-  if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
+  if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
+ STMT_VINFO_BB_VINFO (stmt_info),
  def_stmt, def, dt))
{
  /* Without looking at the actual initializer a vector of
@@ -1642,8 +1639,7 @@ vect_analyze_slp_cost_1 (loop_vec_info l
 /* Compute the cost for the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-  slp_instance instance, unsigned nunits)
+vect_analyze_slp_cost (slp_instance instance, void *data)
 {
   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;

Commit: RX: Better use of PUSHM and POPM

2015-05-28 Thread Nick Clifton

Hi Guys,

  I am applying the patch below to enhance the RX backend so that it
  will push and pop multiple groups of registers using the PUSHM and
  POPM instructions, thus reducing code size and increasing
  performance.

Cheers
  Nick

gcc/ChangeLog
2015-05-28  Nick Clifton  ni...@redhat.com

* config/rx/rx.c (push_regs): New function.  Extracts code from...
(rx_expand_prologue): ... here.  Use push_regs to push even small
spans of registers.
(pop_regs): New function.
(rx_expand_epilogue):  Use pop_regs to pop even small spans of
registers.

Index: gcc/config/rx/rx.c
===
--- gcc/config/rx/rx.c  (revision 223737)
+++ gcc/config/rx/rx.c  (working copy)
@@ -1567,6 +1567,10 @@
  has specified --fixed-reg-name on the command line and in such
  circumstances we do not want to touch the fixed registers at all.
 
+ Note also that the code in the prologue/epilogue handlers will
+ automatically merge multiple PUSHes of adjacent registers into a single
+ PUSHM.
+
  FIXME: Is it worth improving this heuristic ?  */
   pushed_mask = (-1  low)  ~(-1  (high + 1));
   unneeded_pushes = (pushed_mask  (~ save_mask))  pushed_mask;
@@ -1716,6 +1720,19 @@
   return;
 }
 
+static void
+push_regs (unsigned int high, unsigned int low)
+{
+  rtx insn;
+
+  if (low == high)
+insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
+  else
+insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1) * 
UNITS_PER_WORD),
+  gen_rx_store_vector (low, high)));
+  mark_frame_related (insn);
+}
+
 void
 rx_expand_prologue (void)
 {
@@ -1725,7 +1742,6 @@
   unsigned int low;
   unsigned int high;
   unsigned int reg;
-  rtx insn;
 
   /* Naked functions use their own, programmer provided prologues.  */
   if (is_naked_func (NULL_TREE))
@@ -1735,7 +1751,7 @@
 
   if (flag_stack_usage_info)
 current_function_static_stack_size = frame_size + stack_size;
-
+  
   /* If we use any of the callee-saved registers, save them now.  */
   if (mask)
 {
@@ -1743,20 +1759,25 @@
   for (reg = CC_REGNUM; reg --;)
if (mask  (1  reg))
  {
-   insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
-   mark_frame_related (insn);
+   low = high = reg;
+
+   /* Look for a span of registers.
+  Note - we do not have to worry about -Os and whether
+  it is better to use a single, longer PUSHM as
+  rx_get_stack_layout has already done that for us.  */
+   while (reg--  0)
+ if ((mask  (1  reg)) == 0)
+   break;
+ else
+   --low;
+
+   push_regs (high, low);
+   if (reg == (unsigned) -1)
+ break;
  }
 }
   else if (low)
-{
-  if (high == low)
-   insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
-  else
-   insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
-   * UNITS_PER_WORD),
-  gen_rx_store_vector (low, high)));
-  mark_frame_related (insn);
-}
+push_regs (high, low);
 
   if (MUST_SAVE_ACC_REGISTER)
 {
@@ -2031,6 +2052,16 @@
   low == 0);
 }
 
+static void
+pop_regs (unsigned int high, unsigned int low)
+{
+  if (high == low)
+emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
+  else
+emit_insn (gen_stack_popm (GEN_INT (((high - low) + 1) * UNITS_PER_WORD),
+  gen_rx_popm_vector (low, high)));
+}
+
 void
 rx_expand_epilogue (bool is_sibcall)
 {
@@ -2143,16 +2174,16 @@
{
  for (reg = 0; reg  CC_REGNUM; reg ++)
if (register_mask  (1  reg))
- emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
+ {
+   low = high = reg;
+   while (register_mask  (1  high))
+ high ++;
+   pop_regs (high - 1, low);
+   reg = high;
+ }
}
   else if (low)
-   {
- if (high == low)
-   emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
- else
-   emit_insn (gen_stack_popm (GEN_INT (regs_size),
-  gen_rx_popm_vector (low, high)));
-   }
+   pop_regs (high, low);
 
   if (is_fast_interrupt_func (NULL_TREE))
{

Re: [patch] libjava signal handling for FreeBSD (amd64/i386)

2015-05-28 Thread Andrew Haley

On 27/05/15 20:53, Andreas Tobler wrote:
 Is this ok for trunk?

Excellent, thanks.

Andrew.

[Ada] Speed improvements for controlled types

2015-05-28 Thread Arnaud Charlet

This patch changes the implementation of controlled types so that in simple
cases, they are just as efficient as noncontrolled types where initialization
and cleanup is done by hand.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-27  Bob Duff  d...@adacore.com

* exp_ch3.adb (Build_Array_Init_Proc, Build_Record_Init_Proc):
Inline init_procs when the type has controlled parts. Remove
obsolete comments about those init_procs -- init_procs for
such types are no longer complex. A typical init_proc just
initializes the 'Tag field, and calls the parent init_proc
(e.g. for Limited_Controlled), which calls the grandparent
(for Root_Controlled), which does nothing. This all boils down
to one instruction when inlined.
* exp_ch7.adb (Create_Finalizer): Inline the finalizer.

Index: exp_ch7.adb
===
--- exp_ch7.adb (revision 223752)
+++ exp_ch7.adb (working copy)
@@ -1440,6 +1440,13 @@
 --  resides, there is no need for elaboration checks.
 
 Set_Kill_Elaboration_Checks (Fin_Id);
+
+--  Inlining the finalizer produces a substantial speedup at -O2.
+--  It is inlined by default at -O3. Either way, it is called
+--  exactly twice (once on the normal path, and once for
+--  exceptions/abort), so this won't bloat the code too much.
+
+Set_Is_Inlined  (Fin_Id);
  end if;
 
  --  Step 2: Creation of the finalizer specification
Index: exp_ch3.adb
===
--- exp_ch3.adb (revision 223754)
+++ exp_ch3.adb (working copy)
@@ -311,7 +311,7 @@
--  Predefined_Primitive_Bodies.
 
function Has_New_Non_Standard_Rep (T : Entity_Id) return Boolean;
-   --  returns True if there are representation clauses for type T that are not
+   --  Returns True if there are representation clauses for type T that are not
--  inherited. If the result is false, the init_proc and the discriminant
--  checking functions of the parent can be reused by a derived type.
 
@@ -761,14 +761,12 @@
 Set_Debug_Info_Off (Proc_Id);
  end if;
 
- --  Set inlined unless controlled stuff or tasks around, in which
- --  case we do not want to inline, because nested stuff may cause
- --  difficulties in inter-unit inlining, and furthermore there is
- --  in any case no point in inlining such complex init procs.
+ --  Set inlined unless tasks are around, in which case we do not
+ --  want to inline, because nested stuff may cause difficulties in
+ --  inter-unit inlining, and furthermore there is in any case no
+ --  point in inlining such complex init procs.
 
- if not Has_Task (Proc_Id)
-   and then not Needs_Finalization (Proc_Id)
- then
+ if not Has_Task (Proc_Id) then
 Set_Is_Inlined (Proc_Id);
  end if;
 
@@ -3619,14 +3617,10 @@
  --  The initialization of protected records is not worth inlining.
  --  In addition, when compiled for another unit for inlining purposes,
  --  it may make reference to entities that have not been elaborated
- --  yet. The initialization of controlled records contains a nested
- --  clean-up procedure that makes it impractical to inline as well,
- --  and leads to undefined symbols if inlined in a different unit.
- --  Similar considerations apply to task types.
+ --  yet. Similar considerations apply to task types.
 
  if not Is_Concurrent_Type (Rec_Type)
and then not Has_Task (Rec_Type)
-   and then not Needs_Finalization (Rec_Type)
  then
 Set_Is_Inlined  (Proc_Id);
  end if;

[Ada] Visibility error of selected component in instance body

2015-05-28 Thread Arnaud Charlet

This patch fixes a spurious visibility error on a selected component in an
instance body, when the type of the prefix of the selected component is an
actual of the instance, and the desired component is inherited through
one or more derivations

The package derived.ads below must compile quietly:

--
package AST is

   type AST_Node_Type is abstract tagged private;
   type AST_Node_Access is access AST_Node_Type;
   type AST_Node is access all AST_Node_Type'Class;

   procedure Compute_Indent_Level (Node : access AST_Node_Type) is abstract;

private

   type AST_Node_Type is abstract tagged record
  Indent_Level : Natural;
   end record;

end AST;
---
generic
   type Node_Type is abstract new AST_Node_Type with private;
   type Node is access all Node_Type'Class;
package AST.List is

   type List_Type is new AST_Node_Type with record
  N : Node;
   end record;

   overriding
   procedure Compute_Indent_Level (Node : access List_Type);

end AST.List;
package body AST.List is

   overriding
   procedure Compute_Indent_Level (Node : access List_Type) is
   begin
  Node.N.Indent_Level := Node.Indent_Level;
   end Compute_Indent_Level;

end AST.List;
---
with AST; use AST;
with AST.List;

package Derived is

   type Derived_Type is abstract new AST_Node_Type with null record;
   type Derived is access all Derived_Type'Class;

   package Lists is new AST.List
 (Node_Type = Derived_Type,
  Node  = Derived);

end Derived;

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-27  Ed Schonberg  schonb...@adacore.com

* sem_ch4.adb (Analyze_Selected_Component): If the type to use
is a derived type and is a generic actual, the selected component
appears within an instance body, and the check over the type
has failed, examine ancestor types for the desired component.
(Find_Component_In_Instance): If record type is a derived type,
examine all ancestors in order to locate desired component.

Index: sem_ch4.adb
===
--- sem_ch4.adb (revision 223754)
+++ sem_ch4.adb (working copy)
@@ -4102,7 +4102,8 @@
   --  searches have failed. If a match is found, the Etype of both N and
   --  Sel are set from this component, and the entity of Sel is set to
   --  reference this component. If no match is found, Entity (Sel) remains
-  --  unset.
+  --  unset. For a derived type that is an actual of the instance, the
+  --  desired component may be found in any ancestor.
 
   function Has_Mode_Conformant_Spec (Comp : Entity_Id) return Boolean;
   --  It is known that the parent of N denotes a subprogram call. Comp
@@ -4117,18 +4118,36 @@
 
   procedure Find_Component_In_Instance (Rec : Entity_Id) is
  Comp : Entity_Id;
+ Typ  : Entity_Id;
 
   begin
- Comp := First_Component (Rec);
- while Present (Comp) loop
-if Chars (Comp) = Chars (Sel) then
-   Set_Entity_With_Checks (Sel, Comp);
-   Set_Etype (Sel, Etype (Comp));
-   Set_Etype (N,   Etype (Comp));
+ Typ := Rec;
+ while Present (Typ) loop
+Comp := First_Component (Typ);
+while Present (Comp) loop
+   if Chars (Comp) = Chars (Sel) then
+  Set_Entity_With_Checks (Sel, Comp);
+  Set_Etype (Sel, Etype (Comp));
+  Set_Etype (N,   Etype (Comp));
+  return;
+   end if;
+
+   Next_Component (Comp);
+end loop;
+
+--  If not found, the component may be declared in the parent
+--  type or its full view, if any.
+
+if Is_Derived_Type (Typ) then
+   Typ := Etype (Typ);
+
+   if Is_Private_Type (Typ) then
+  Typ := Full_View (Typ);
+   end if;
+
+else
return;
 end if;
-
-Next_Component (Comp);
  end loop;
 
  --  If we fall through, no match, so no changes made
@@ -4789,6 +4808,18 @@
  Par := Etype (Par);
   end loop;
 
+   --  Another special case: the type is an extension of a private
+   --  type T, is an actual in an instance, and we are in the body
+   --  of the instance, so the generic body had a full view of the
+   --  type declaration for T or of some ancestor that defines the
+   --  component in question.
+
+   elsif Is_Derived_Type (Type_To_Use)
+ and then Used_As_Generic_Actual (Type_To_Use)
+ and then In_Instance_Body
+   then
+  Find_Component_In_Instance (Parent_Subtype (Type_To_Use));
+
--  In ASIS mode the generic parent type may be absent. Examine
--  the parent type directly for a component that may have been

Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread David Malcolm

On Wed, 2015-05-27 at 15:56 +0200, mliska wrote:
 gcc/ChangeLog:
 
 2015-04-30  Martin Liska  mli...@suse.cz
 
   * ira-color.c (init_update_cost_records): Use new type-based pool 
 allocator.
   (get_update_cost_record): Likewise.
   (free_update_cost_record_list): Likewise.
   (finish_update_cost_records): Likewise.
   (initiate_cost_update): Likewise.
 ---
  gcc/ira-color.c | 19 +--
  1 file changed, 5 insertions(+), 14 deletions(-)
 
 diff --git a/gcc/ira-color.c b/gcc/ira-color.c
 index 4750714..4aec98e 100644
 --- a/gcc/ira-color.c
 +++ b/gcc/ira-color.c
 @@ -1166,16 +1166,8 @@ setup_profitable_hard_regs (void)
 allocnos.  */
  
  /* Pool for update cost records.  */
 -static alloc_pool update_cost_record_pool;
 -
 -/* Initiate update cost records.  */
 -static void
 -init_update_cost_records (void)
 -{
 -  update_cost_record_pool
 -= create_alloc_pool (update cost records,
 -  sizeof (struct update_cost_record), 100);
 -}
 +static pool_allocatorupdate_cost_record update_cost_record_pool
 +  (update cost records, 100);

Am I right in thinking that this is a statically-allocated object with a
non-trivial constructor?  i.e. that this constructor has to run before
main is entered?

Do our coding guidelines allow for this?  (I've been burned by this
before, on a buggy C++ runtime that didn't manage to support these).
I'm a little nervous about this, touching global state before
main (e.g. from the point-of-view of the JIT), though I don't know yet
if this is just a gut reaction, or if there's a valid concern here (I'm
officially on holiday this week, so I haven't had a chance to dig deeply
into these patches yet, sorry).

[...snip...]

 @@ -1264,7 +1256,6 @@ initiate_cost_update (void)
  = (struct update_cost_queue_elem *) ira_allocate (size);
memset (update_cost_queue_elems, 0, size);
update_cost_check = 0;
 -  init_update_cost_records ();
  }

(for reference, this is where the manually-coded initialization call was
made)

Hope this is constructive
Dave

Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Christophe Lyon

On 28 May 2015 at 12:22, Alan Lawrence alan.lawre...@arm.com wrote:
 Christophe Lyon wrote:

 On 26 May 2015 at 18:25, Alan Lawrence alan.lawre...@arm.com wrote:

 I don't see this symptom - I am able to execute such subsets with either
 my,
 or Sandra's, advsimd-intrinsics.exp.


 I didn't try to run with your patch, I thought it was an oversight of
 yours.

 Sorry, indeed I've just checked that gcc-dg-runtest includes the filter.

 Is it that you have to check runtest_file_p because you are setting
 gcc_parallel_test_enable to 0?

 I'm doing more testing now, but I think I can drop my
 advsimd-intrinsics.exp
 changes altogether; I'll post an updated patch series shortly.

 In the meantime I'm curious as to why you found the
 gcc_parallel_test_enable
 necessary? (And is it safe to reset it to 1 afterwards, rather than to a
 saved value?)

 See https://gcc.gnu.org/ml/gcc/2014-10/msg00081.html


 So after working through the differences between Sandra's and my patch, I
 find the existing advsimd-intrinsics.exp achieves pretty much the same
 thing, and preserves the same list of test variants (e.g. the -Og -g from
 set-torture-options which I had removed).

 However, I've tried testing advsimd-intrinsics.exp (both the whole thing,
 and individual tests using RUNTESTFLAGS) with and without this hunk:

 @@ -57,20 +57,7 @@ set-torture-options $C_TORTURE_OPTIONS {{}}
 $LTO_TORTURE_OPTI
  set additional_flags [add_options_for_arm_neon ]

  # Main loop.
 -foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
 -# If we're only testing specific files and this isn't one of them, skip
 it.
 -if ![runtest_file_p $runtests $src] then {
 -   continue
 -}
 -
 -# runtest_file_p is already run above, and the code below can run
 -# runtest_file_p again, make sure everything for this test is
 -# performed if the above runtest_file_p decided this runtest
 -# instance should execute the test
 -gcc_parallel_test_enable 0
 -gcc-dg-runtest $src  $additional_flags
 -gcc_parallel_test_enable 1
 -}
 +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] 
 ${additional_flags}

 and find exactly the same tests are run and pass. My hypothesis is thus that
 you only need the explicit loop, manual checking of runtest_file_p, and
 gcc_parallel_test_enable, in order to do *both* c-torture-execute *and*
 gcc-dg-runtest; since we are now only doing the latter, this is unnecessary.
 Does that make sense? (If you agree, I'll propose that as a standalone
 cleanup patch.)


Indeed I think you are right. Since we no longer call
c-torture-execute, we no longer need to call runtest_file_p here.
Having only one remaining call to runtest_file_p in gcc-dg-runtest is
parallel-safe. Thanks for the cleanup.

Christophe.

 Cheers, Alan

[gomp4, committed] Remove superfluous main in kernels-loop-n.c

2015-05-28 Thread Tom de Vries


Hi,

this patch removes a superfluous main function from a test-case.

Committed.

Thanks,
- Tom
Remove superfluous main in kernels-loop-n.c

2015-05-27  Tom de Vries  t...@codesourcery.com

	* c-c++-common/goacc/kernels-loop-n.c (main): Remove.

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 7bf744e..5f7c1df6 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -7,7 +7,7 @@
 #define N ((1024 * 512) + 1)
 #define COUNTERTYPE unsigned int
 
-static int __attribute__((noinline,noclone))
+int
 foo (COUNTERTYPE n)
 {
   unsigned int *__restrict a;
@@ -41,8 +41,3 @@ foo (COUNTERTYPE n)
   return 0;
 }
 
-int
-main (void)
-{
-  return foo (N);
-}
-- 
1.9.1

[gomp4, committed] Enable parallelization of kernels-loop-n.c

2015-05-28 Thread Tom de Vries


Hi,

I've committed this patch.

It enables parallelization of the kernels-loops-n.c testcase.

This is now possible, due the commit of the fix for PR65637 to the 
gomp-4_0-branch ( 
https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01569.html ).


Thanks,
- Tom
Enable parallelization of kernels-loop-n.c

2015-05-27  Tom de Vries  t...@codesourcery.com

	* tree-parloops.c (parallelize_loops): Remove checks limiting type of
	loops allowed.

	* c-c++-common/goacc/kernels-loop-n.c: Check for parallelization.

diff --git a/gcc/testsuite/ChangeLog.gomp b/gcc/testsuite/ChangeLog.gomp
index a5f1167..9b657fb 100644
--- a/gcc/testsuite/ChangeLog.gomp
+++ b/gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,9 @@
 2015-05-27  Tom de Vries  t...@codesourcery.com
 
+	* c-c++-common/goacc/kernels-loop-n.c: Check for parallelization.
+
+2015-05-27  Tom de Vries  t...@codesourcery.com
+
 	* c-c++-common/goacc/kernels-loop-n.c (main): Remove.
 
 2015-05-15  Cesar Philippidis  ce...@codesourcery.com
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 5f7c1df6..d227786 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -1,6 +1,7 @@
 /* { dg-additional-options -O2 } */
 /* { dg-additional-options -ftree-parallelize-loops=32 } */
-/* TODO: parallelize this example.  */
+/* { dg-additional-options -fdump-tree-parloops_oacc_kernels-all } */
+/* { dg-additional-options -fdump-tree-optimized } */
 
 #include stdlib.h
 
@@ -41,3 +42,14 @@ foo (COUNTERTYPE n)
   return 0;
 }
 
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times SUCCESS: may be parallelized 1 parloops_oacc_kernels } } */
+/* { dg-final { scan-tree-dump-not FAILED: parloops_oacc_kernels } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times (?n);; Function .*foo.*._omp_fn.0 1 optimized } } */
+
+/* { dg-final { scan-tree-dump-times (?n)pragma omp target oacc_parallel.*num_gangs\\(32\\) 1 parloops_oacc_kernels } } */
+
+/* { dg-final { cleanup-tree-dump parloops_oacc_kernels } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f5bc8b3..e10179d 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2273,24 +2273,6 @@ parallelize_loops (bool oacc_kernels_p)
 	  if (loop-inner)
 	continue;
 
-	  gcc_assert (single_succ_p (region_entry));
-	  basic_block first = single_succ (region_entry);
-
-	  /* TODO: Allow conditional loop entry.  This test triggers when the
-	 loop bound is not known at compile time.  */
-	  if (!single_succ_p (first))
-	continue;
-
-	  /* TODO: allow more complex loops.  */
-	  if (single_exit (loop) == NULL)
-	continue;
-
-	  /* TODO: Allow other code than a single loop inside a kernels
-	 region.  */
-	  if (loop-header != single_succ (first)
-	  || single_exit (loop)-dest != region_exit)
-	continue;
-
 	  if (dump_file  (dump_flags  TDF_DETAILS))
 	fprintf (dump_file,
 		 Trying loop %d with header bb %d in oacc kernels region\n,
-- 
1.9.1

Re: [Patch AArch64] PR target/66200 - gcc / libstdc++ TLC for weak memory models.

2015-05-28 Thread James Greenhalgh

On Wed, May 20, 2015 at 02:58:09PM +0100, Ramana Radhakrishnan wrote:
 Hi,
 
   Someone privately pointed out that the ARM and AArch64 ports do not 
 define TARGET_RELAXED_ORDERING given that the architecture(s) mandates a 
 weak memory model. This patch fixes it for AArch64, the ARM patch 
 follows in due course after appropriate testing.
 
 I will also note that we can define __test_and_acquire as well as 
 __set_and_release and I'm toying with a follow-up patch for the same.
 
 Also it may make sense to consider changing the defaults to a safer 
 form, or indeed forcing ports to define some of this rather than 
 allowing for silent wrong code issues. However I'm not about to do so in 
 the context of this patch.
 
 Bootstrapped and regression tested on aarch64-none-linux-gnu with no 
 regressions.
 
 Ok to apply to trunk and all release branches ?
 
 gcc/
 
 PR target/66200
 
 * config/aarch64/aarch64.c (TARGET_RELAXED_ORDERING): Define
 
 libstdc++-v3/
 
 PR target/66200
 
 * configure.host (host_cpu): Add aarch64 case.
 * config/cpu/aarch64/atomic_word.h: New file

This is OK for trunk and looks serious enough for backport to the
release branches.

Though,

 +// This is necessary iff TARGET_RELAXED_ORDERING is defined in tm.h.

this comment had me looking for uses of TARGET_RELAXED_ORDERING as a macro
rather than a hook, and left... But fixing that (here and in the
template) is not essential for this patch, or for backporting.

Thanks,
James

 P.S.  It's interesting to note that ia64 doesn't define the barriers 
 which appear to be used in a number of other places than just the 
 constructor guard functions (probably wrongly on the assumption that one 
 doesn't need the barriers elsewhere). I suspect other architectures like 
 MIPS may also be affected by this.

 commit 414345c424fa020717c6c3083089cd987f3032db
 Author: Ramana Radhakrishnan ramana.radhakrish...@arm.com
 Date:   Wed May 20 13:55:44 2015 +0100
 
 Add relaxed memory ordering cases.
 
 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
 index 7f0cc0d..273aa06 100644
 --- a/gcc/config/aarch64/aarch64.c
 +++ b/gcc/config/aarch64/aarch64.c
 @@ -11644,6 +11644,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
  #undef TARGET_SCHED_FUSION_PRIORITY
  #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
  
 +#undef TARGET_RELAXED_ORDERING
 +#define TARGET_RELAXED_ORDERING true
 +
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include gt-aarch64.h
 diff --git a/libstdc++-v3/config/cpu/aarch64/atomic_word.h 
 b/libstdc++-v3/config/cpu/aarch64/atomic_word.h
 new file mode 100644
 index 000..4afe6ed
 --- /dev/null
 +++ b/libstdc++-v3/config/cpu/aarch64/atomic_word.h
 @@ -0,0 +1,44 @@
 +// Low-level type for atomic operations -*- C++ -*-
 +
 +// Copyright (C) 2015 Free Software Foundation, Inc.
 +//
 +// This file is part of the GNU ISO C++ Library.  This library is free
 +// software; you can redistribute it and/or modify it under the
 +// terms of the GNU General Public License as published by the
 +// Free Software Foundation; either version 3, or (at your option)
 +// any later version.
 +
 +// This library is distributed in the hope that it will be useful,
 +// but WITHOUT ANY WARRANTY; without even the implied warranty of
 +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +// GNU General Public License for more details.
 +
 +// Under Section 7 of GPL version 3, you are granted additional
 +// permissions described in the GCC Runtime Library Exception, version
 +// 3.1, as published by the Free Software Foundation.
 +
 +// You should have received a copy of the GNU General Public License and
 +// a copy of the GCC Runtime Library Exception along with this program;
 +// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 +// http://www.gnu.org/licenses/.
 +
 +/** @file atomic_word.h
 + *  This file is a GNU extension to the Standard C++ Library.
 + */
 +
 +#ifndef _GLIBCXX_ATOMIC_WORD_H
 +#define _GLIBCXX_ATOMIC_WORD_H   1
 +
 +
 +typedef int _Atomic_word;
 +
 +// This one prevents loads from being hoisted across the barrier;
 +// in other words, this is a Load-Load acquire barrier.
 +// This is necessary iff TARGET_RELAXED_ORDERING is defined in tm.h.
 +#define _GLIBCXX_READ_MEM_BARRIER __asm __volatile (dmb ishld:::memory)
 +
 +// This one prevents stores from being sunk across the barrier; in other
 +// words, a Store-Store release barrier.
 +#define _GLIBCXX_WRITE_MEM_BARRIER __asm __volatile (dmb ishst:::memory)
 +
 +#endif
 diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
 index a349ce3..42a45d9 100644
 --- a/libstdc++-v3/configure.host
 +++ b/libstdc++-v3/configure.host
 @@ -153,6 +153,9 @@ esac
  # Most can just use generic.
  # THIS TABLE IS SORTED.  KEEP IT THAT WAY.
  case ${host_cpu} in
 +  aarch64*)
 +atomic_word_dir=cpu/aarch64
 +;;
alpha*)
  atomic_word_dir=cpu/alpha
  ;;

Re: [PATCH] Fix PR66142

2015-05-28 Thread Richard Biener

On Wed, 27 May 2015, Kyrill Tkachov wrote:

 Hi Richard,
 
 On 26/05/15 14:54, Richard Biener wrote:
  The following fixes the testcase in PR66142
  
  Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
  
  Richard.
  
  2015-05-26  Richard Biener  rguent...@suse.de
  
  PR tree-optimization/66142
  * tree-ssa-sccvn.c (vn_reference_lookup_3): Manually compare
  MEM_REFs for the same base address.
  
  * gcc.dg/tree-ssa/ssa-fre-44.c: New testcase.
  
  Index: gcc/tree-ssa-sccvn.c
  ===
  --- gcc/tree-ssa-sccvn.c(revision 223574)
  +++ gcc/tree-ssa-sccvn.c(working copy)
  @@ -1894,7 +1894,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
  size2 = lhs_ref.size;
  maxsize2 = lhs_ref.max_size;
  if (maxsize2 == -1
  - || (base != base2  !operand_equal_p (base, base2, 0))
  + || (base != base2
  +  (TREE_CODE (base) != MEM_REF
  + || TREE_CODE (base2) != MEM_REF
  + || TREE_OPERAND (base, 0) != TREE_OPERAND (base2, 0)
  + || !tree_int_cst_equal (TREE_OPERAND (base, 1),
  + TREE_OPERAND (base2, 1
|| offset2  offset
|| offset2 + size2  offset + maxsize)
  return (void *)-1;
  Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
  ===
  --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c  (revision 0)
  +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c  (working copy)
  @@ -0,0 +1,62 @@
  +/* { dg-do compile } */
  +/* { dg-options -O -fdump-tree-fre1 } */
  +
  +struct A { float x, y; };
  +struct B { struct A u; };
  +void bar (struct A *);
  +
  +float
  +f1 (struct B *x, int y)
  +{
  +  struct A p;
  +  p.x = 1.0f;
  +  p.y = 2.0f;
  +  struct A *q = x[y].u;
  +  *q = p;
  +  float f = x[y].u.x + x[y].u.y;
  +  bar (p);
  +  return f;
  +}
  +
  +float
  +f2 (struct B *x, int y)
  +{
  +  struct A p;
  +  p.x = 1.0f;
  +  p.y = 2.0f;
  +  x[y].u = p;
  +  float f = x[y].u.x + x[y].u.y;
  +  bar (p);
  +  return f;
  +}
  +
  +float
  +f3 (struct B *x, int y)
  +{
  +  struct A p;
  +  p.x = 1.0f;
  +  p.y = 2.0f;
  +  struct A *q = x[y].u;
  +  __builtin_memcpy (q-x, p.x, sizeof (float));
  +  __builtin_memcpy (q-y, p.y, sizeof (float));
  +  *q = p;
  +  float f = x[y].u.x + x[y].u.y;
  +  bar (p);
  +  return f;
  +}
  +
  +float
  +f4 (struct B *x, int y)
  +{
  +  struct A p;
  +  p.x = 1.0f;
  +  p.y = 2.0f;
  +  __builtin_memcpy (x[y].u.x, p.x, sizeof (float));
  +  __builtin_memcpy (x[y].u.y, p.y, sizeof (float));
  +  float f = x[y].u.x + x[y].u.y;
  +  bar (p);
  +  return f;
  +}
 
 I see this test failing on arm-none-eabi. In particular, the f4 dump is the
 only one
 that doesn't contain return 3.0. Instead it is:
 f4 (struct B * x, int y)
 {
   float f;
   struct A p;
   unsigned int y.3_5;
   unsigned int _6;
   struct B * _8;
   float * _9;
   float * _14;
   float _19;
   float _23;
 
   bb 2:
   p.x = 1.0e+0;
   p.y = 2.0e+0;
   y.3_5 = (unsigned int) y_4(D);
   _6 = y.3_5 * 8;
   _8 = x_7(D) + _6;
   _9 = _8-u.x;
   __builtin_memcpy (_9, p.x, 4);
   _14 = _8-u.y;
   __builtin_memcpy (_14, p.y, 4);
   _19 = _8-u.x;
   _23 = _8-u.y;
   f_24 = _19 + _23;
   bar (p);
   p ={v} {CLOBBER};
   return f_24;
 
 }
 
 Thanks,
 Kyrill

Thanks - the following patch fixes this (tested with a cross).  It
also removes a spurious aggregate assignment from f3 which makes
it fail without the patch as well (as expected).

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2015-05-28  Richard Biener  rguent...@suse.de

PR tree-optimization/66142
* tree-ssa-sccvn.c (vn_reference_lookup_3): Handle non-GIMPLE
values better in memcpy destination handling.  Handle non-aliasing
we discover here.

* gcc.dg/tree-ssa/ssa-fre-44.c: Fixup.

Index: gcc/tree-ssa-sccvn.c
===
*** gcc/tree-ssa-sccvn.c(revision 223802)
--- gcc/tree-ssa-sccvn.c(working copy)
*** vn_reference_lookup_3 (ao_ref *ref, tree
*** 2028,2034 
lhs = gimple_call_arg (def_stmt, 0);
lhs_offset = 0;
if (TREE_CODE (lhs) == SSA_NAME)
!   lhs = SSA_VAL (lhs);
if (TREE_CODE (lhs) == ADDR_EXPR)
{
  tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (lhs, 0),
--- 2028,2043 
lhs = gimple_call_arg (def_stmt, 0);
lhs_offset = 0;
if (TREE_CODE (lhs) == SSA_NAME)
!   {
! lhs = SSA_VAL (lhs);
! if (TREE_CODE (lhs) == SSA_NAME)
!   {
! gimple def_stmt = SSA_NAME_DEF_STMT (lhs);
! if (gimple_assign_single_p (def_stmt)
!  gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
!   lhs = gimple_assign_rhs1 (def_stmt);
!   }
!   }
if (TREE_CODE (lhs) ==

Re: [PATCH, RFC] New memory usage statistics infrastructure

2015-05-28 Thread Thomas Schwinge

Hi!

On Fri, 15 May 2015 16:38:40 +0200, Martin Liška mli...@suse.cz wrote:
 Following patch attempts to rewrite memory reports for GCC's internal 
 allocations
 [...]

(Got commtited to trunk in r223748.)

   * hash-map-traits.h: New file.

In that one you added a copyright/licensing header, but...

   * mem-stats-traits.h: New file.
   * mem-stats.h: New file.

... in these two you didn't (but should):

 --- /dev/null
 +++ b/gcc/hash-map-traits.h
 @@ -0,0 +1,104 @@
 +/* A hash map traits.
 +   Copyright (C) 2015 Free Software Foundation, Inc.
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify it under
 +the terms of [...]

 --- /dev/null
 +++ b/gcc/mem-stats-traits.h
 @@ -0,0 +1,20 @@
 +#ifndef GCC_MEM_STATS_TRAITS_H
 +#define GCC_MEM_STATS_TRAITS_H
 +
 +/* Memory allocation origin.  */
 +enum mem_alloc_origin
 +{
 +  HASH_TABLE,
 +  HASH_MAP,
 +  HASH_SET,
 +  VEC,
 +  BITMAP,
 +  GGC,
 +  MEM_ALLOC_ORIGIN_LENGTH
 +};
 +
 +/* Verbose names of the memory allocation origin.  */
 +static const char * mem_alloc_origin_names[] = { Hash tables, Hash maps, 
 Hash sets,
 +  Heap vectors, Bitmaps, GGC memory };
 +
 +#endif // GCC_MEM_STATS_TRAITS_H

 --- /dev/null
 +++ b/gcc/mem-stats.h
 @@ -0,0 +1,535 @@
 +#ifndef GCC_MEM_STATS_H
 +#define GCC_MEM_STATS_H
 +
 +#include hash-map-traits.h
 +[...]


Grüße,
 Thomas


signature.asc
Description: PGP signature

Re: [Patch V2]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Tristan Gingold

Hello,

 #define BACKTRACE_SUPPORTS_THREADS @BACKTRACE_SUPPORTS_THREADS@
 +
 +/* BACKTRACE_SUPPORTS_DATA will be #defined'd as 1 if the backtrace library
 +   also handles data symbols, 0 if not.  */
 +
 +#define BACKTRACE_SUPPORTS_DATA @BACKTRACE_SUPPORTS_DATA@
 
 End users are expected to read and understand this file, so I think
 this comment is too obscure.  I suggest:
 
 BACKTRACE_SUPPORTS_DATA will be #define'd as 1 if backtrace_syminfo
 will work for variables.  It will always work for functions.

The comment is now replaced by your wording.

 I would have thought you could distinguish relevant symbols using the
 storage class and type.  But perhaps not.

Not that easily, unfortunately.  Section names also appear like data
symbols, and furthermore linker script symbol made btest failing.
But we could revisit this issue later.

 diff --git a/libbacktrace/filetype.awk b/libbacktrace/filetype.awk
 index 0a656f7..37099ad 100644
 --- a/libbacktrace/filetype.awk
 +++ b/libbacktrace/filetype.awk
 @@ -1,3 +1,4 @@
 # An awk script to determine the type of a file.
 /\177ELF\001/ { if (NR == 1) { print elf32; exit } }
 /\177ELF\002/ { if (NR == 1) { print elf64; exit } }
 +/\114\001/{ if (NR == 1) { print pecoff; exit } }
 
 That works for 32-bit, but I think not for 64-bit.  For 64-bit I would
 expect \144\206.

Fixed.

 +#include windows.h
 
 Where is windows.h going to come from when building a
 cross-compiler?  I think this needs to be removed.  I see that you
 define the structs yourself, as you should, so why do you need
 windows.h?

Indeed, windows.h is not needed, so I have removed it.

 +/* Read a potentially unaligned 2 byte word at P, using native endianness.  
 */
 
 Is there really ever a case where a 2 byte word might be misaligned?

Good remark.  I have changed the comment.

 +/* Return true iff SYM is a defined symbol for a function.  Data symbols
 +   are discarded because they aren't easily identified.  */
 +
 +static int
 +coff_is_symbol (const b_coff_internal_symbol *isym)
 +{
 +  return isym-type == 0x20  isym-sec  0;
 +}
 
 Is this really right?  This seems to test for DT_FCN set, but won't a
 function returning, say, int, have type 0x24 (DT_FCN  N_TBSHFT) | T_INT?

According to MS doc, only 0x20 or 0x00 is used.  But I have changed the doc
for clarity.

 Also, the name isn't right--this is coff_is_function_symbol.

Changed.

 
 +  if (coff_expand_symbol (isym, asym, sects_num, strtab, strtab_size) 
  0)
 +   {
 + error_callback (data, invalid coff symbol, 0);
 + return 0;
 +   }
 
 That's not a very useful error message--can you be more specific?

It is now more specific (although such error should never happen).

 +  /* Allocate memory for symbols are strings.  */
 
 Comment looks wrong--omit are”?

Yes.

Here is the new version of the patch.

Regards,
Tristan.

libbacktrace/
2015-05-21  Tristan Gingold  ging...@adacore.com

* pecoff.c: New file.
* Makefile.am (FORMAT_FILES): Add pecoff.c and dependencies.
* Makefile.in: Regenerate.
* filetype.awk: Detect pecoff.
* configure.ac: Define BACKTRACE_SUPPORTS_DATA on elf platforms.
Add pecoff.
* btest.c (test5): Test enabled only if BACKTRACE_SUPPORTS_DATA is
true.
* backtrace-supported.h.in (BACKTRACE_SUPPORTS_DATA): Define.
* configure: Regenerate.
* pecoff.c: New file.

commit fe0f364bf5836dea2aacb6d963c782d12c4d5561
Author: Tristan Gingold ging...@adacore.com
Date:   Thu May 21 14:29:44 2015 +0200

Add support for PE/COFF to libbacktrace.

diff --git a/libbacktrace/ChangeLog b/libbacktrace/ChangeLog
index c6604d9..139521a 100644
--- a/libbacktrace/ChangeLog
+++ b/libbacktrace/ChangeLog
@@ -1,3 +1,17 @@
+2015-05-21  Tristan Gingold  ging...@adacore.com
+
+   * pecoff.c: New file.
+   * Makefile.am (FORMAT_FILES): Add pecoff.c and dependencies.
+   * Makefile.in: Regenerate.
+   * filetype.awk: Detect pecoff.
+   * configure.ac: Define BACKTRACE_SUPPORTS_DATA on elf platforms.
+   Add pecoff.
+   * btest.c (test5): Test enabled only if BACKTRACE_SUPPORTS_DATA is
+   true.
+   * backtrace-supported.h.in (BACKTRACE_SUPPORTS_DATA): Define.
+   * configure: Regenerate.
+   * pecoff.c: New file.
+
 2015-05-13  Michael Haubenwallner  michael.haubenwall...@ssi-schaefer.com
 
* Makefile.in: Regenerated with automake-1.11.6.
diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
index a93b82a..c5f0dcb 100644
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -56,6 +56,7 @@ BACKTRACE_FILES = \
 
 FORMAT_FILES = \
elf.c \
+   pecoff.c \
unknown.c
 
 VIEW_FILES = \
@@ -124,6 +125,7 @@ fileline.lo: config.h backtrace.h internal.h
 mmap.lo: config.h backtrace.h internal.h
 mmapio.lo: config.h backtrace.h internal.h
 nounwind.lo: config.h internal.h
+pecoff.lo: config.h backtrace.h internal.h
 posix.lo: config.h backtrace.h internal.h

Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Alan Lawrence


Christophe Lyon wrote:

On 26 May 2015 at 18:25, Alan Lawrence alan.lawre...@arm.com wrote:

I don't see this symptom - I am able to execute such subsets with either my,
or Sandra's, advsimd-intrinsics.exp.


I didn't try to run with your patch, I thought it was an oversight of yours.

Sorry, indeed I've just checked that gcc-dg-runtest includes the filter.


Is it that you have to check runtest_file_p because you are setting
gcc_parallel_test_enable to 0?

I'm doing more testing now, but I think I can drop my advsimd-intrinsics.exp
changes altogether; I'll post an updated patch series shortly.

In the meantime I'm curious as to why you found the gcc_parallel_test_enable
necessary? (And is it safe to reset it to 1 afterwards, rather than to a
saved value?)

See https://gcc.gnu.org/ml/gcc/2014-10/msg00081.html


So after working through the differences between Sandra's and my patch, I find 
the existing advsimd-intrinsics.exp achieves pretty much the same thing, and 
preserves the same list of test variants (e.g. the -Og -g from 
set-torture-options which I had removed).


However, I've tried testing advsimd-intrinsics.exp (both the whole thing, and 
individual tests using RUNTESTFLAGS) with and without this hunk:


@@ -57,20 +57,7 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTI
 set additional_flags [add_options_for_arm_neon ]

 # Main loop.
-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
-# If we're only testing specific files and this isn't one of them, skip it.
-if ![runtest_file_p $runtests $src] then {
-   continue
-}
-
-# runtest_file_p is already run above, and the code below can run
-# runtest_file_p again, make sure everything for this test is
-# performed if the above runtest_file_p decided this runtest
-# instance should execute the test
-gcc_parallel_test_enable 0
-gcc-dg-runtest $src  $additional_flags
-gcc_parallel_test_enable 1
-}
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]]  
${additional_flags}


and find exactly the same tests are run and pass. My hypothesis is thus that you 
only need the explicit loop, manual checking of runtest_file_p, and 
gcc_parallel_test_enable, in order to do *both* c-torture-execute *and* 
gcc-dg-runtest; since we are now only doing the latter, this is unnecessary. 
Does that make sense? (If you agree, I'll propose that as a standalone cleanup 
patch.)


Cheers, Alan

[PATCH] Simple fix to enhance outer-loop vectorization.

2015-05-28 Thread Yuri Rumyantsev

Hi All,

Here is a simple patch which removes restriction on outer-loop
vectorization -  allow references in inner-loop with zero step. This
case was found in one important benchmark.

Bootstrap and regression testing did not show any new failures.
Is it OK for trunk.

ChangeLog:
2015-05-28  Yuri Rumyantsev  ysrum...@gmail.com

* tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
consecutive accesses within outer-loop vectorization for references
with zero step in inner-loop.

gcc/testsuite/ChangeLog:
* gcc.dg/vect/fast-math-vect-outer-1.c: New test.


patch
Description: Binary data

Re: acc_on_device for device_type_host_nonshm

2015-05-28 Thread H.J. Lu

On Thu, May 21, 2015 at 4:10 AM, Jakub Jelinek ja...@redhat.com wrote:
 On Thu, May 21, 2015 at 01:02:12PM +0200, Thomas Schwinge wrote:
 Hi!

 On Thu, 7 May 2015 19:32:26 +0100, Julian Brown jul...@codesourcery.com 
 wrote:
  Here's a new version of the patch [...]

  OK for trunk?

 Makes sense to me (with just a request to drop the testsuite changes, see
 below), to get the existing regressions under control.  Jakub?

 Ok for trunk.

  PR libgomp/65742
 
  gcc/
  * builtins.c (expand_builtin_acc_on_device): Don't use open-coded
  sequence for !ACCEL_COMPILER.
 

It breaks bootstrap on x86:

https://gcc.gnu.org/ml/gcc-regression/2015-05/msg00389.html

I checked in this to fix it.

-- 
H.J.
---
Index: gcc/ChangeLog
===
--- gcc/ChangeLog (revision 223804)
+++ gcc/ChangeLog (working copy)
@@ -1,3 +1,8 @@
+2015-05-28  H.J. Lu  hongjiu...@intel.com
+
+ * builtins.c (expand_builtin_acc_on_device): Mark parameters
+ with ATTRIBUTE_UNUSED.
+
 2015-05-28  Julian Brown  jul...@codesourcery.com

  PR libgomp/65742
Index: gcc/builtins.c
===
--- gcc/builtins.c (revision 223804)
+++ gcc/builtins.c (working copy)
@@ -5911,7 +5911,8 @@
acceleration device (ACCEL_COMPILER conditional).  */

 static rtx
-expand_builtin_acc_on_device (tree exp, rtx target)
+expand_builtin_acc_on_device (tree exp ATTRIBUTE_UNUSED,
+  rtx target ATTRIBUTE_UNUSED)
 {
 #ifdef ACCEL_COMPILER
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))

[patch] libstdc++/65352 fix ubsan errors in std::arrayT, 0

2015-05-28 Thread Jonathan Wakely


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::arrayT, 0 anyway.

Tested powerpc64le-linux, committed to trunk.

commit 0d999cf16b8f6a0d9bbf4bfe96b29e7b73a259e4
Author: Jonathan Wakely jwak...@redhat.com
Date:   Thu May 28 12:21:36 2015 +0100

	PR libstdc++/65352
	* include/std/array (__array_traits::_S_ptr): New function.
	(array::data): Use _S_ptr to avoid creating invalid reference.
	* testsuite/23_containers/array/tuple_interface/get_neg.cc: Adjust
	dg-error line numbers.
	* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
	likewise.

diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 429506b..24be44f 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -51,6 +51,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   static constexpr _Tp
   _S_ref(const _Type __t, std::size_t __n) noexcept
   { return const_cast_Tp(__t[__n]); }
+
+  static constexpr _Tp*
+  _S_ptr(const _Type __t) noexcept
+  { return const_cast_Tp*(__t); }
 };
 
  templatetypename _Tp
@@ -61,6 +65,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  static constexpr _Tp
  _S_ref(const _Type, std::size_t) noexcept
  { return *static_cast_Tp*(nullptr); }
+
+ static constexpr _Tp*
+ _S_ptr(const _Type) noexcept
+ { return nullptr; }
};
 
   /**
@@ -219,11 +227,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
   pointer
   data() noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 
   const_pointer
   data() const noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 };
 
   // Array comparisons.
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
index 7604412..6830964 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
@@ -28,6 +28,6 @@ int n1 = std::get1(a);
 int n2 = std::get1(std::move(a));
 int n3 = std::get1(ca);
 
-// { dg-error static assertion failed  { target *-*-* } 274 }
-// { dg-error static assertion failed  { target *-*-* } 283 }
+// { dg-error static assertion failed  { target *-*-* } 282 }
 // { dg-error static assertion failed  { target *-*-* } 291 }
+// { dg-error static assertion failed  { target *-*-* } 299 }
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
index 9788053..5d75366 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
@@ -23,4 +23,4 @@
 
 typedef std::tuple_element1, std::arrayint, 1::type type;
 
-// { dg-error static assertion failed  { target *-*-* } 322 }
+// { dg-error static assertion failed  { target *-*-* } 330 }

[gomp4] Expand OpenACC thread builtins inline

2015-05-28 Thread Julian Brown

For partitioned loops, we're currently calling library functions (in
libgcc) to determine the cardinality of the set of threads a particular
loop is distributed over (given a set of gang/worker/vector toggles),
and the index of the current thread within that set.

This patch reimplements those two functions in terms of the
(PTX-specific!) builtins that Bernd has recently added in order to
implement vector-single/worker-single predication, which expand
directly to machine instructions on the target (or to constant zero/one
on the host). It also makes use of the same gwv bitfields that are set
up by that new code.

The previous BUILT_IN_GOACC_GET_THREAD_NUM and
BUILT_IN_GOACC_GET_NUM_THREADS builtins are removed entirely.

This works reasonably well, but there are some regressions caused by
middle-end optimisers having extra freedom to manipulate the CFG in
ways that PTX cannot support without the optimisation barrier of the
calls to the thread builtins being present. This will be addressed by a
follow-on patch.

Pre-approved for gomp4, but I'll wait for comments on the follow-on
patch before applying so as not to leave the branch in a broken state.

Thanks,

Julian

ChangeLog

gcc/
* builtins.c (expand_oacc_builtin): Return const1_rtx for
ntid/nctaid builtins when the associated patterns are not present.
* omp-builtins.def (BUILT_IN_GOACC_GET_THREAD_NUM)
(BUILT_IN_GOACC_GET_NUM_THREADS): Remove.
* omp-low.c (struct omp_for_data): Remove gang, worker, vector
fields.
(extract_omp_for_data): Don't initialise deleted gang, worker,
vector fields.
(expand_oacc_get_num_threads, expand_oacc_get_thread_num): New
functions.
(lower_reduction_clauses): Use above functions.
(expand_omp_for_static_nochunk): Likewise.
(expand_omp_for_static_chunk): Likewise.
commit 1be8ada44a9f91d2eba16ef1f81243707647f237
Author: Julian Brown jul...@codesourcery.com
Date:   Fri May 15 03:20:42 2015 -0700

Inlined OpenACC thread builtins.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index ebd4b4a..cd51821 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5964,8 +5964,8 @@ expand_oacc_builtin (enum built_in_function fcode, tree exp, rtx target)
 case BUILT_IN_GOACC_NTID:
 #ifdef HAVE_oacc_ntid
   icode = CODE_FOR_oacc_ntid;
-  result = const1_rtx;
 #endif
+  result = const1_rtx;
   break;
 case BUILT_IN_GOACC_TID:
 #ifdef HAVE_oacc_tid
@@ -5975,8 +5975,8 @@ expand_oacc_builtin (enum built_in_function fcode, tree exp, rtx target)
 case BUILT_IN_GOACC_NCTAID:
 #ifdef HAVE_oacc_nctaid
   icode = CODE_FOR_oacc_nctaid;
-  result = const1_rtx;
 #endif
+  result = const1_rtx;
   break;
 case BUILT_IN_GOACC_CTAID:
 #ifdef HAVE_oacc_ctaid
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index ac1f802..47d9e45 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -69,10 +69,6 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NCTAID, GOACC_nctaid,
 		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_CTAID, GOACC_ctaid,
 		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, GOACC_get_thread_num,
-		   BT_FN_INT_INT_INT_INT, ATTR_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, GOACC_get_num_threads,
-		   BT_FN_INT_INT_INT_INT, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, GOACC_get_ganglocal_ptr,
 		   BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, GOACC_deviceptr,
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b114887..f82247b 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -263,7 +263,6 @@ struct omp_for_data
   tree chunk_size;
   gomp_for *for_stmt;
   tree pre, iter_type;
-  tree gang, worker, vector;
   int collapse;
   bool have_nowait, have_ordered;
   enum omp_clause_schedule_kind sched_kind;
@@ -749,16 +748,6 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
   gcc_assert (fd-chunk_size == NULL_TREE);
   fd-chunk_size = build_int_cst (TREE_TYPE (fd-loop.v), 1);
 }
-
-  /* Extract the OpenACC gang, worker and vector clauses.  */
-  t = find_omp_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_GANG);
-  fd-gang = (t == NULL_TREE) ? integer_zero_node : integer_one_node;
-
-  t = find_omp_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_WORKER);
-  fd-worker = (t == NULL_TREE) ? integer_zero_node : integer_one_node;
-
-  t = find_omp_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_VECTOR);
-  fd-vector = (t == NULL_TREE) ? integer_zero_node : integer_one_node;
 }
 
 
@@ -4919,6 +4908,159 @@ is_atomic_compatible_reduction (tree var, omp_context *ctx)
   return true;
 }
 
+
+/* Find the total number of threads used by a region partitioned by
+   GWV_BITS.  Setup code required for the calculation is added to SEQ.  Note
+   that this is currently used from both OMP-lowering and OMP-expansion phases,
+   and uses

Re: [patch] testsuite enable PIE tests on FreeBSD

2015-05-28 Thread Thomas Schwinge

Hi!

On Wed, 20 May 2015 14:30:38 -0600, Jeff Law l...@redhat.com wrote:
 On 05/20/2015 11:04 AM, Andreas Tobler wrote:
  the attached patch enables some PIE tests on FreeBSD.

 Wouldn't it be better to remove the target selector and instead add:
 
 /* { dg-require-effective-target pie } */
 
 In each of those tests?

(Got committed to trunk in r223498.)  Thanks!  I wanted to suggest
something along the same lines, because:

 While the net effect is the same today, it means there's only one place 
 to change if another x86 target gains PIE support in the future.

GNU Hurd got it, too.  :-)


Grüße,
 Thomas


signature.asc
Description: PGP signature

Re: [Patch]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Tristan Gingold


 On 27 May 2015, at 15:36, Jeff Law l...@redhat.com wrote:

 +static int
 +coff_is_symbol (const b_coff_internal_symbol *isym)
 +{
 +  return isym-type == 0x20  isym-sec  0;
 +}
 You probably want const or enum so that you can have a symbolic name rather 
 than 0x20 here.  It also seems like the name ought to better indicate it's 
 testing for function symbols.

Yes, this is now changed.

 It's a given  that you know COFF specifics better than I ever did, so I'm 
 comfortable assuming you got the COFF specifics right.
 
 The overall structure of elf.c  coff.c is the same with code templates that 
 are very similar, except they work on different underlying types.  Presumably 
 there wasn't a good way to factor any of the generic looking bits out?  And 
 no, I'm not requesting you rewrite all this in BFD :-)

The dummy callback could indeed be easily shared.  For the remaining, that’s 
not so simple given the types.  Maybe we can create a ‘C class’ for symbol 
infos.

Tristan.

Re: [PATCH/RFC] Make loop-header-copying more aggressive, rerun before tree-if-conversion

2015-05-28 Thread Richard Biener

On Fri, May 22, 2015 at 5:42 PM, Alan Lawrence alan.lawre...@arm.com wrote:
 This example which I wrote to test ifconversion, currently fails to
 if-convert or vectorize:

 int foo ()
 {
   for (int i = 0; i  32 ; i++)
 {
   int m = (a[i]  i) ? 5 : 4;
   b[i] = a[i] * m;
 }
 }

 ...because jump-threading in dom1 rearranged the loop into a form that
 neither if-conversion nor vectorization would attempt. Discussion at
 https://gcc.gnu.org/ml/gcc/2015-04/msg00343.html lead to the suggestion that
 I should rerun loop-header copying (an earlier attempt to fix ifconversion,
 https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01743.html, still did not
 enable vectorization.)

 This patch does so (and makes slightly less conservative, to tackle the
 example above). I found I had to make this a separate pass, so that the phi
 nodes were cleaned up at the end of the pass before running
 tree_if_conversion. Also at this stage in the compiler (inside loop opts) it
 was not possible to run loop_optimizer_init+finalize, or other
 loop_optimizer data structures needed later would be deleted; hence, I have
 two nearly-but-not-quite-identical passes, the new ch_vect avoiding the
 init/finalize. I tried to tackle this with some C++ subclassing, which
 removes the duplication, but the result feels a little ugly; suggestions for
 any neater approach welcome.

 This patch causes failure of the scan-tree-dump of dom2 in
 gcc.dg/ssa/pr21417.c. This looks for jump-threading to perform an
 optimization, but no longer finds the expected line in the log - as the
 loop-header-copying phase has already done an equivalent transformation
 *before* dom2. The final CFG is thus in the desired form, but I'm not sure
 how to determine this (scanning the CFG itself is very difficult, well
 beyond what we can do with regex, requiring looking at multiple lines and
 basic blocks). Can anyone advise? [The test issue can be worked around by
 preserving the old do_while_p logic for the first header-copying pass, and
 using the new logic only for the second, but this is more awkward inside the
 compiler, which feels wrong.]

 Besides the new vect-ifcvt-11.c, the testsuite actually has a couple of
 other examples where this patch enables (undesired!) vectorization. I've
 dealt with these, but for the record:
 * gcc.dg/vect/slp-perm-7.c: the initialization loop in main,
 contained a check that input[i]  200; this was already optimized out
 (because input[i] was set to i%256, where iN with N #defined to 16), but
 that loop was not vectorized because:
 /work/alalaw01/oban/srcfsf/gcc/gcc/testsuite/gcc.dg/vect/slp-perm-7.c:54:3:
 note: not vectorized: latch block not empty.
 /work/alalaw01/oban/srcfsf/gcc/gcc/testsuite/gcc.dg/vect/slp-perm-7.c:54:3:
 note: bad loop form.

 * gcc.dg/vect/vect-strided-a-u16-i4.c: the main1() function has
 three loops; the first (initialization) has an 'if (y) abort() /* Avoid
 vectorization.  */'. However, the 'volatile int y = 0' this was meant to
 reference, is actually shadowed by a local non-volatile; the test is thus
 peeled off and absent from the body of the loop. The loop only avoided
 vectorization because of non-empty latch and bad loop form, as previous.

 With this patch, both those loops now have good form, hence I have fixed
 both to check a global volatile to prevent these extraneous parts from being
 vectorized.

 Tested with bootstrap + check-gcc on x86_64 and AArch64 (linux). As noted
 above, this causes a spurious PASS-FAIL of a scan-tree-dump test, which I'm
 unsure how to fix, but no other regressions.

Apart from Jeffs comment - the usual fix for the undesired
vectorization is to put
a __asm__ volatile (); in the loop.

+  /* If any block in the loop has an exit edge, and code after it, it is
+ not a do-while loop.  */
+  basic_block *body = get_loop_body (loop);
+  for (unsigned i = 0; i  loop-num_nodes; i++)

wouldn't it be easier to verify that the predecessor of the loop latch
contains the (only) loop exit?

Like

   e = single_exit (loop);
   if (!e)
 return true;

   if (single_exit (loop)-pred != single_pred (loop-latch))
 return false;

?  In fact I think that even for multiple exists we want the latch predecessor
have an exit (though the vectorizer or if-conversion don't deal with that).

Note that single_exit () only works when the loop state has
LOOPS_HAVE_RECORDED_EXITS
thus it might be easier to simply check

  FOR_EACH_EDGE (... single_pred (loop-latch)-succs ..)
 if (e-dest == loop-latch)
   ;
 else
   break;
  if (!e || !loop_exit_edge_p (loop, e))
return true;

which should work always.

Coding-style wise, can you please move the common pass_ch_vect::execute out
of the pass_ch_vect class?

  unsigned int res = pass_ch_vect::execute (fun);

looks ugly, as well as deriving pass_ch from pass_ch_vect.  I think pass_ch_vect
should be only executed if flag_tree_loop_vectorize is enabled.

   loop_optimizer_init (LOOPS_NORMAL
-

Re: [PATCH] Fix PR66142

2015-05-28 Thread Andreas Schwab

Richard Biener rguent...@suse.de writes:

   * gcc.dg/tree-ssa/ssa-fre-44.c: New testcase.

On ia64:

$ grep -c return 3.0 ssa-fre-44.c.035t.fre1 
3
$ tail ssa-fre-44.c.035t.fre1
  _19 = _8-u.x;
  _23 = _8-u.y;
  f_24 = _19 + _23;
  bar (p);
  p ={v} {CLOBBER};
  return f_24;

}



Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
And now for something completely different.

Re: [Patch AArch64] PR target/66200 - gcc / libstdc++ TLC for weak memory models.

2015-05-28 Thread James Greenhalgh

On Thu, May 21, 2015 at 09:54:19AM +0100, Ramana Radhakrishnan wrote:
 And here's an additional patch for the testsuite which was missed in the 
 original posting.
 
 This is a testism that's testing code generation as per 
 TARGET_RELAXED_ORDERING being false and therefore needs to be adjusted 
 as attached.
 
 Ramana
 
 PR target/66200
 * g++.dg/abi/aarch64_guard1.C: Adjust testcase.

OK.

Thanks,
James

 diff --git a/gcc/testsuite/g++.dg/abi/aarch64_guard1.C 
 b/gcc/testsuite/g++.dg/abi/aarch64_guard1.C
 index ca1778b..e78f93c 100644
 --- a/gcc/testsuite/g++.dg/abi/aarch64_guard1.C
 +++ b/gcc/testsuite/g++.dg/abi/aarch64_guard1.C
 @@ -13,5 +13,4 @@ int *foo ()
  }
  
  // { dg-final { scan-assembler _ZGVZ3foovE1x,8,8 } }
 -// { dg-final { scan-tree-dump _ZGVZ3foovE1x  1 original } }
  // { dg-final { cleanup-tree-dump original } }

Re: [PATCH] Simple fix to enhance outer-loop vectorization.

2015-05-28 Thread Richard Biener

On Thu, May 28, 2015 at 1:00 PM, Yuri Rumyantsev ysrum...@gmail.com wrote:
 Hi All,

 Here is a simple patch which removes restriction on outer-loop
 vectorization -  allow references in inner-loop with zero step. This
 case was found in one important benchmark.

 Bootstrap and regression testing did not show any new failures.
 Is it OK for trunk.

 ChangeLog:
 2015-05-28  Yuri Rumyantsev  ysrum...@gmail.com

 * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
 consecutive accesses within outer-loop vectorization for references
 with zero step in inner-loop.

 gcc/testsuite/ChangeLog:
 * gcc.dg/vect/fast-math-vect-outer-1.c: New test.

Can you please add a non-omp-simd testcase that triggers this as well and that
is a runtime testcase verifying the transform is correct?

Also please don't add to the strange testcase-name machinery but just
use { dg-additional-options -ffast-math }

Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c   (revision 223653)
+++ tree-vect-data-refs.c   (working copy)
@@ -2261,7 +2261,6 @@
   return true;
 }

-
 /* Analyze the access pattern of the data-reference DR.
In case of non-consecutive accesses call vect_analyze_group_access() to
analyze groups of accesses.  */

spurious white-space change


@@ -2291,14 +2290,8 @@
   if (loop_vinfo  integer_zerop (step))

Surely the comment before this needs updating now.

 {
   GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
-  if (nested_in_vect_loop_p (loop, stmt))
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_NOTE, vect_location,
-zero step in inner loop of nest\n);
- return false;
-   }
-  return DR_IS_READ (dr);
+  if (!nested_in_vect_loop_p (loop, stmt))
+   return DR_IS_READ (dr);
 }

   if (loop  nested_in_vect_loop_p (loop, stmt))

so what happens after the patch?  It would be nice to have a comment
explaining what happens in the nested_in_vect_loop_p case for
the case when the outer-loop step is zero and when it is not zero.

In particular as you don't need any code generation changes - this hints
at that you may miss something ;)

Otherwise of course the patch is ok - lifting restrictions is good.

Thanks,
Richard.

Re: [Patch]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Tristan Gingold


 On 28 May 2015, at 02:26, Ian Lance Taylor i...@google.com wrote:

 The #include windows.h will break cross-compilers.  It's not OK for
 trunk until that is fixed.

I am confused by this comment, for two reasons:

- I don’t see how that would break cross-compilers.  Cross compilers
 hosted on windows are not impacted by this include when the library is
 used for the tools.  When then backtrace library is used for the target,
 pecoff is not used unless the target is windows.
 So I don’t see a case where the include breaks cross-compilers.

- If the case exists, I don’t see how to implement backtrace within
 shared libraries: I need a windows specific function to get the list
 of DLL.

Tristan.

Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Christophe Lyon

On 28 May 2015 at 13:32, Christophe Lyon christophe.l...@linaro.org wrote:
 On 28 May 2015 at 12:22, Alan Lawrence alan.lawre...@arm.com wrote:
 Christophe Lyon wrote:

 On 26 May 2015 at 18:25, Alan Lawrence alan.lawre...@arm.com wrote:

 I don't see this symptom - I am able to execute such subsets with either
 my,
 or Sandra's, advsimd-intrinsics.exp.


 I didn't try to run with your patch, I thought it was an oversight of
 yours.

 Sorry, indeed I've just checked that gcc-dg-runtest includes the filter.

 Is it that you have to check runtest_file_p because you are setting
 gcc_parallel_test_enable to 0?

 I'm doing more testing now, but I think I can drop my
 advsimd-intrinsics.exp
 changes altogether; I'll post an updated patch series shortly.

 In the meantime I'm curious as to why you found the
 gcc_parallel_test_enable
 necessary? (And is it safe to reset it to 1 afterwards, rather than to a
 saved value?)

 See https://gcc.gnu.org/ml/gcc/2014-10/msg00081.html


 So after working through the differences between Sandra's and my patch, I
 find the existing advsimd-intrinsics.exp achieves pretty much the same
 thing, and preserves the same list of test variants (e.g. the -Og -g from
 set-torture-options which I had removed).

 However, I've tried testing advsimd-intrinsics.exp (both the whole thing,
 and individual tests using RUNTESTFLAGS) with and without this hunk:

 @@ -57,20 +57,7 @@ set-torture-options $C_TORTURE_OPTIONS {{}}
 $LTO_TORTURE_OPTI
  set additional_flags [add_options_for_arm_neon ]

  # Main loop.
 -foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
 -# If we're only testing specific files and this isn't one of them, skip
 it.
 -if ![runtest_file_p $runtests $src] then {
 -   continue
 -}
 -
 -# runtest_file_p is already run above, and the code below can run
 -# runtest_file_p again, make sure everything for this test is
 -# performed if the above runtest_file_p decided this runtest
 -# instance should execute the test
 -gcc_parallel_test_enable 0
 -gcc-dg-runtest $src  $additional_flags
 -gcc_parallel_test_enable 1
 -}
 +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] 
 ${additional_flags}

 and find exactly the same tests are run and pass. My hypothesis is thus that
 you only need the explicit loop, manual checking of runtest_file_p, and
 gcc_parallel_test_enable, in order to do *both* c-torture-execute *and*
 gcc-dg-runtest; since we are now only doing the latter, this is unnecessary.
 Does that make sense? (If you agree, I'll propose that as a standalone
 cleanup patch.)


 Indeed I think you are right. Since we no longer call
 c-torture-execute, we no longer need to call runtest_file_p here.
 Having only one remaining call to runtest_file_p in gcc-dg-runtest is
 parallel-safe. Thanks for the cleanup.


So in fact, except for the comment about '-w' it seems you initial
patch was mostly OK, right?

 Christophe.

 Cheers, Alan

Re: [PATCH, RFC] New memory usage statistics infrastructure

2015-05-28 Thread Martin Liška


On 05/28/2015 01:15 PM, Thomas Schwinge wrote:

Hi!

On Fri, 15 May 2015 16:38:40 +0200, Martin Liška mli...@suse.cz wrote:

Following patch attempts to rewrite memory reports for GCC's internal 
allocations
[...]


(Got commtited to trunk in r223748.)


* hash-map-traits.h: New file.


In that one you added a copyright/licensing header, but...


* mem-stats-traits.h: New file.
* mem-stats.h: New file.


... in these two you didn't (but should):


--- /dev/null
+++ b/gcc/hash-map-traits.h
@@ -0,0 +1,104 @@
+/* A hash map traits.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of [...]



--- /dev/null
+++ b/gcc/mem-stats-traits.h
@@ -0,0 +1,20 @@
+#ifndef GCC_MEM_STATS_TRAITS_H
+#define GCC_MEM_STATS_TRAITS_H
+
+/* Memory allocation origin.  */
+enum mem_alloc_origin
+{
+  HASH_TABLE,
+  HASH_MAP,
+  HASH_SET,
+  VEC,
+  BITMAP,
+  GGC,
+  MEM_ALLOC_ORIGIN_LENGTH
+};
+
+/* Verbose names of the memory allocation origin.  */
+static const char * mem_alloc_origin_names[] = { Hash tables, Hash maps, Hash 
sets,
+  Heap vectors, Bitmaps, GGC memory };
+
+#endif // GCC_MEM_STATS_TRAITS_H



--- /dev/null
+++ b/gcc/mem-stats.h
@@ -0,0 +1,535 @@
+#ifndef GCC_MEM_STATS_H
+#define GCC_MEM_STATS_H
+
+#include hash-map-traits.h
+[...]



Grüße,
  Thomas



Hello.

Thank you for pointing about missing copyright.
Following patch adds that.

Ready for trunk?
Thanks,
Martin
From 2abc4116e3a941dbd8e0cff22698b979c9c1c830 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Thu, 28 May 2015 14:24:58 +0200
Subject: [PATCH] Missing copyright for mem-stats header files.

gcc/ChangeLog:

2015-05-28  Martin Liska  mli...@suse.cz

	* mem-stats-traits.h: Add copyright header.
	* mem-stats.h: Likewise.
---
 gcc/mem-stats-traits.h | 20 
 gcc/mem-stats.h| 20 
 2 files changed, 40 insertions(+)

diff --git a/gcc/mem-stats-traits.h b/gcc/mem-stats-traits.h
index de1614e..c5cb84a 100644
--- a/gcc/mem-stats-traits.h
+++ b/gcc/mem-stats-traits.h
@@ -1,3 +1,23 @@
+/* A memory statistics traits.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Martin Liska  mli...@suse.cz
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+http://www.gnu.org/licenses/.  */
+
 #ifndef GCC_MEM_STATS_TRAITS_H
 #define GCC_MEM_STATS_TRAITS_H
 
diff --git a/gcc/mem-stats.h b/gcc/mem-stats.h
index ac47231..9e3dec0 100644
--- a/gcc/mem-stats.h
+++ b/gcc/mem-stats.h
@@ -1,3 +1,23 @@
+/* A memory statistics tracking infrastructure.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Martin Liska  mli...@suse.cz
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+http://www.gnu.org/licenses/.  */
+
 #ifndef GCC_MEM_STATS_H
 #define GCC_MEM_STATS_H
 
-- 
2.1.4

Re: [PATCH v2] Handle OS X deployment targets correctly

2015-05-28 Thread Mike Stump

On May 20, 2015, at 2:57 PM, Lawrence Velázquez v...@larryv.me wrote:
 2015-05-15  Lawrence Velázquez  v...@larryv.me
 
   PR target/63810
   * gcc/config/darwin-c.c (version_components): New global enum.
   (parse_version, version_as_legacy_macro)
   (version_as_modern_macro, macosx_version_as_macro): New functions.
   (version_as_macro): Remove.
   (darwin_cpp_builtins): Use new function.
   * gcc/testsuite/gcc.dg/darwin-minversion-3.c: Update testcase.
   * gcc/testsuite/gcc.dg/darwin-minversion-4.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-5.c: New testcase.
   * gcc/testsuite/gcc.dg/darwin-minversion-6.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-7.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-8.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-9.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-10.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-11.c: Ditto.
   * gcc/testsuite/gcc.dg/darwin-minversion-12.c: Ditto.

Committed revision 223808.

Thanks for all your work.

Re: [patch] Make std::string default constructor conditionally noexcept

2015-05-28 Thread Jonathan Wakely


On 13/05/15 14:36 +0100, Jonathan Wakely wrote:

http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4383.html#2455

Voted into the WP in Lenexa.

We already did the right thing for vector, so only basic_string needs
to change.

Tested powerpc64le-linux, committed to trunk.


Also committed to gcc-5-branch.


commit 634ed6e2d2ea4d69a29a8907044e6f68541d88aa
Author: Jonathan Wakely jwak...@redhat.com
Date:   Wed May 13 14:21:37 2015 +0100

* include/bits/basic_string.h (basic_string::basic_string()): Make
noexcept conditional on allocator (LWG 2455).

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 3e3eef4..093f502 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -377,7 +377,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
  /**
   *  @brief  Default constructor creates an empty string.
   */
-  basic_string() _GLIBCXX_NOEXCEPT
+  basic_string()
+#if __cplusplus = 201103L
+  noexcept(is_nothrow_default_constructible_Alloc::value)
+#endif
  : _M_dataplus(_M_local_data())
  { _M_set_length(0); }

Re: [patch] libstdc++/65352 fix ubsan errors in std::arrayT, 0

2015-05-28 Thread Jonathan Wakely


On 28/05/15 12:53 +0100, Jonathan Wakely wrote:

Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::arrayT, 0 anyway.

Tested powerpc64le-linux, committed to trunk.


And gcc-5-branch.


commit 0d999cf16b8f6a0d9bbf4bfe96b29e7b73a259e4
Author: Jonathan Wakely jwak...@redhat.com
Date:   Thu May 28 12:21:36 2015 +0100

PR libstdc++/65352
* include/std/array (__array_traits::_S_ptr): New function.
(array::data): Use _S_ptr to avoid creating invalid reference.
* testsuite/23_containers/array/tuple_interface/get_neg.cc: Adjust
dg-error line numbers.
* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
likewise.

Re: [patch] Rename template parameter of std::__alloc_rebind

2015-05-28 Thread Jonathan Wakely


On 01/05/15 16:23 +0100, Jonathan Wakely wrote:

The name of this alias template is a copypaste error, it should be
_Alloc. Tested powerpc64le-linux, committed to trunk.


Also committed to gcc-5-branch.


commit a772309ec9fc300e57edd750fa32b8320d68004a
Author: Jonathan Wakely jwak...@redhat.com
Date:   Fri May 1 16:12:16 2015 +0100

* include/bits/alloc_traits.h (__alloc_rebind): Change parameter name.

diff --git a/libstdc++-v3/include/bits/alloc_traits.h 
b/libstdc++-v3/include/bits/alloc_traits.h
index d6c42ec..12c6c12 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h
@@ -72,8 +72,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  typedef _Alloc_Tp, _Args... __type;
};

-  templatetypename _Ptr, typename _Tp
-using __alloc_rebind = typename __alloctr_rebind_Ptr, _Tp::__type;
+  templatetypename _Alloc, typename _Tp
+using __alloc_rebind = typename __alloctr_rebind_Alloc, _Tp::__type;

  /**
   * @brief  Uniform interface to all allocator types.

Re: acc_on_device for device_type_host_nonshm

2015-05-28 Thread Julian Brown

On Thu, 28 May 2015 04:48:58 -0700
H.J. Lu hjl.to...@gmail.com wrote:

 On Thu, May 21, 2015 at 4:10 AM, Jakub Jelinek ja...@redhat.com
 wrote:
  On Thu, May 21, 2015 at 01:02:12PM +0200, Thomas Schwinge wrote:
  Hi!
 
  On Thu, 7 May 2015 19:32:26 +0100, Julian Brown
  jul...@codesourcery.com wrote:
   Here's a new version of the patch [...]
 
   OK for trunk?
 
  Makes sense to me (with just a request to drop the testsuite
  changes, see below), to get the existing regressions under
  control.  Jakub?
 
  Ok for trunk.
 
   PR libgomp/65742
  
   gcc/
   * builtins.c (expand_builtin_acc_on_device): Don't use
   open-coded sequence for !ACCEL_COMPILER.
  
 
 It breaks bootstrap on x86:
 
 https://gcc.gnu.org/ml/gcc-regression/2015-05/msg00389.html
 
 I checked in this to fix it.

Apologies, and thanks!

Julian

Re: [PATCH] Optimize (CST1 A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Jakub Jelinek

On Thu, May 28, 2015 at 02:15:45PM +0200, Marek Polacek wrote:
 This PR points out that we weren't able to optimize 1  x == 2 to just
 x == 1.  This is my attempt to fix that: if we see (CST1  A) == CST2
 and CST2 is a multiple of CST1, use log2 to get rid of the shift, but
 only if the result of the shift is a natural number (including zero).

Is CST2 a multiple of CST1 the best test though?
I mean say in
(0x8001U  x) == 0x2U
0x2U isn't a multiple of 0x8001U, yet there is only one
valid value of x for which it holds (17), so we could very well
optimize that to x == 17.
If popcount of the CST1 is 1, then multiple_of_p is supposedly sufficient
(have you checked if CST1 is negative that it still works?), for others
supposedly we could have a helper function that would just try
in a loop all shift counts from 0 to precision - 1, and note when
(CST1  b) == CST2 - if for no b, then it should fold regardless of
has_single_use to false or true, if for exactly one shift count, then
use a comparison against that shift count, otherwise give up?
Supposedly (CST1  A) == CST2 can be handled similarly.

 If CST2 is not a multiple of CST1, then the whole expression can be
 discarded, but I'd like to do that as a follow-up.
 (It would help if our current match.pd grammar allowed us to use else,
 any plans on doing that?)

Jakub

Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-05-28 Thread Martin Liška


On 05/27/2015 07:44 PM, Jeff Law wrote:

On 05/27/2015 07:56 AM, mliska wrote:

Hello.

Following patch set attempts to replace old-style pool allocator
to a type-based one. Moreover, as we utilize  classes and structs that are used
just by a pool allocator, these types have overwritten ctors and dtors.
Thus, using the allocator is much easier and we shouldn't cast types
back and forth. Another beneficat can be achieved in future, as we will
be able to call a class constructors to correctly register a location,
where a memory is allocated (-fgather-detailed-mem-stats).

Patch can boostrap on x86_64-linux-gnu and ppc64-linux-gnu and
survives regression tests on x86_64-linux-gnu.

Ready for trunk?
Thanks,
Martin

gcc/ChangeLog:

2015-04-30  Martin Liska  mli...@suse.cz

* alloc-pool.c (struct alloc_pool_descriptor): Move definition
to header file.
* alloc-pool.h (pool_allocator::pool_allocator): New function.
(pool_allocator::release): Likewise.
(inline pool_allocator::release_if_empty): Likewise.
(inline pool_allocator::~pool_allocator): Likewise.
(pool_allocator::allocate): Likewise.
(pool_allocator::remove): Likewise.

So on a general note, I don't like changing the size of the structure based on 
ENABLE_CHECKING.  If we've got other cases where we do this, then I guess it's 
OK, but if not, I'd prefer not to start doing so.


Hello.

This mechanism has been just adapted. I find it quite useful as we have 
examples in source code where we
allocate same struct/class types from a various pool. For debugging purpose, it 
helps to identify if
release operation is called for a correct pool.





---



+
+  /* Align X to 8.  */
+  size_t align_eight (size_t x)
+  {
+return (((x+7)  3)  3);
+  }
+
+  const char *m_name;
+#ifdef ENABLE_CHECKING
+  ALLOC_POOL_ID_TYPE m_id;
+#endif
+  size_t m_elts_per_block;
+
+  /* These are the elements that have been allocated at least once and freed.  
*/
+  allocation_pool_list *m_returned_free_list;
+
+  /* These are the elements that have not yet been allocated out of
+ the last block obtained from XNEWVEC.  */
+  char* m_virgin_free_list;
+
+  /* The number of elements in the virgin_free_list that can be
+ allocated before needing another block.  */
+  size_t m_virgin_elts_remaining;
+  size_t m_elts_allocated;
+  size_t m_elts_free;
+  size_t m_blocks_allocated;
+  allocation_pool_list *m_block_list;
+  size_t m_block_size;
+  size_t m_elt_size;

Several fields aren't documented.  They're largely self-explanatory, so I won't 
insist you document those trailing fields.  Your call whether or not to add 
docs for them.


Ok, even tough they are self-explanatory, I'm going to document these fields.





+
+  /* Now align the size to a multiple of 4.  */
+  size = align_eight (size);

Why not just aligned to 4, rather than a multiple of 4?  Presumably the extra 4 
bytes don't matter in practice?


Also adapted constant, hope it's chosen as the best.




+
+template typename T
+void
+inline pool_allocatorT::release_if_empty ()
+{
+  if (m_elts_free == m_elts_allocated)
+release ();
+}

Is the release_if_empty all that useful in practice?


Yes, 02/x uses that feature.



So the big issue in my mind continues to be the additional element in the 
structure when ENABLE_CHECKING is on.  As mentioned earlier, if we're already 
doing this elsewhere, then I won't object.  If we aren't, then I don't want to 
start doing so now.

The rest of the stuff are just minor questions, but nothing which would in my 
mind stop this from going forward.

Presumably your testing was with the whole series and they can't go in 
piecemeal, right?


Right, regression tests were run just once for the whole series, but I've 
tested that every individual patch can be applied and the compiler can be 
successfully built.
Anyway, I would like to commit all these patches at once (one by one).
Thus, I'm going to wait for approval for the whole series before I'll commit 
the set.

Thanks,
Martin




jeff

RE: [Patch MIPS] Enable TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS hook

2015-05-28 Thread Robert Suchanek

Hi Matthew,

  +
  +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
  +
  +static reg_class_t
  +mips_ira_change_pseudo_allocno_class (int regno, reg_class_t
  +allocno_class) {
  +  if (FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) || allocno_class !=
  ALL_REGS)
  +return allocno_class;
  +  return GR_REGS;
  +}
  +
 
 I'm concerned that this may not be the right condition but either way,
 I think it is better to switch this around to have the special case
 as the conditional. I found it difficult to understand what it is
 doing even when I know the intent :-) A comment about the purpose seems
 appropriate too here as it won't be obvious to someone new.

I tried to write a sensible comment and found the original change hard 
to describe.  I changed the condition to the special case and did some
experiments.  The patch below is now more concise, better fits the purpose and 
it
seems to have marginally better allocation too.

 Aren't there some fixed point modes that should go in FPRs too? I guess
 paired single (v2sf) doesn't need mentioning as it would never be
 allowed in GR_REGS so pseudos of that mode would never get ALL_REGS,
 is that correct? I.e. will we only see ALL_REGS if a particular
 pseudo/mode truly can be placed in any register according to the
 hard_regno_ok rules?

I think that with the patch below all concerns would be addressed since
the class narrowing would be constrained to integers rather than anything else.

Regards,
Robert 

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index c3755f5..976f844 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -19415,6 +19415,21 @@ mips_lra_p (void)
 {
   return mips_lra_flag;
 }
+
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
+
+static reg_class_t
+mips_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class)
+{
+  /* LRA will generate unnecessary reloads because the LRA's cost pass finds
+ cheaper to move data to/from memory into FP regs rather than GP regs.
+ By narrowing the class for allocnos to GR_REGS for integral modes early,
+ we refrain from using FP regs until they are absolutely necessary.  */
+  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno))  allocno_class == ALL_REGS)
+return GR_REGS;
+  return allocno_class;
+}
+
 

 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -19671,6 +19686,8 @@ mips_lra_p (void)
 #define TARGET_SPILL_CLASS mips_spill_class
 #undef TARGET_LRA_P
 #define TARGET_LRA_P mips_lra_p
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS 
mips_ira_change_pseudo_allocno_class
 
 struct gcc_target targetm = TARGET_INITIALIZER;

[PATCH][5/n] Reduction vectorization improvements

2015-05-28 Thread Richard Biener


The following patch fixes loop vectorization of SLP reduction chains
that involve patterns.  The issue here is that pattern recog runs
after reduction detection and this screws things up.  Re-ordering
this created interesting side-effects so I didn't explore this
further (for now) but instead fix the detected reduction chains
after pattern recog.  This of course just reveals multiple places
where things go wrong with this setting, fixed with the following
patch which finally vectorizes one of the hottest loop nest
in a soon popular x264 encoder/decoder.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-05-28  Richard Biener  rguent...@suse.de

* tree-vect-loop.c (vect_fixup_reduc_chain): New function.
(vect_fixup_scalar_cycles_with_patterns): Likewise.
(vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
after pattern recog.
(vect_create_epilog_for_reduction): Properly handle reductions
with patterns.
(vectorizable_reduction): Likewise.
* tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
reduction chains.
(vect_get_constant_vectors): Create the correct number of
initial values for reductions.
(vect_schedule_slp_instance): Handle reduction chains that are
type changing properly.
* tree-vect-stmts.c (vect_analyze_stmt): Adjust.

* gcc.dg/vect/slp-reduc-sad.c: New testcase.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 223814)
+++ gcc/tree-vect-loop.c(working copy)
@@ -828,6 +828,45 @@ vect_analyze_scalar_cycles (loop_vec_inf
 vect_analyze_scalar_cycles_1 (loop_vinfo, loop-inner);
 }
 
+/* Transfer group and reduction information from STMT to its pattern stmt.  */
+
+static void
+vect_fixup_reduc_chain (gimple stmt)
+{
+  gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+  gimple stmtp;
+  gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
+  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+  GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
+  do
+{
+  stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
+  stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+  if (stmt)
+   GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+}
+  while (stmt);
+  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
+}
+
+/* Fixup scalar cycles that now have their stmts detected as patterns.  */
+
+static void
+vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
+{
+  gimple first;
+  unsigned i;
+
+  FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
+if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
+  {
+   vect_fixup_reduc_chain (first);
+   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
+  }
+}
 
 /* Function vect_get_loop_niters.
 
@@ -1708,6 +1747,8 @@ vect_analyze_loop_2 (loop_vec_info loop_
 
   vect_pattern_recog (loop_vinfo, NULL);
 
+  vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
+
   /* Analyze the access patterns of the data-refs in the loop (consecutive,
  complex, etc.). FORNOW: Only handle consecutive access pattern.  */
 
@@ -4573,8 +4614,12 @@ vect_finalize_reduction:
  exit phi node.  */
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
 {
-  scalar_dest = gimple_assign_lhs (
-   SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
+  gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+  /* Handle reduction patterns.  */
+  if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
+   dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
+
+  scalar_dest = gimple_assign_lhs (dest_stmt);
   group_size = 1;
 }
 
@@ -4875,12 +4920,17 @@ vectorizable_reduction (gimple stmt, gim
   auto_vecgimple phis;
   int vec_num;
   tree def0, def1, tem, op0, op1 = NULL_TREE;
+  bool first_p = true;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
  we don't update STMT_INFO, since only the last stmt is marked as reduction
  and has reduction properties.  */
-  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
-stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  if (GROUP_FIRST_ELEMENT (stmt_info)
+   GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+{
+  stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  first_p = false;
+}
 
   if (nested_in_vect_loop_p (loop, stmt))
 {
@@ -4903,8 +4953,8 @@ vectorizable_reduction (gimple stmt, gim
 return false;
 
   /* Make sure it was already recognized as a reduction computation.  */
-  if (STMT_VINFO_DEF_TYPE (stmt_info) !=

Re: [PATCH] Optimize (CST1 A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Richard Biener

On Thu, May 28, 2015 at 2:15 PM, Marek Polacek pola...@redhat.com wrote:
 This PR points out that we weren't able to optimize 1  x == 2 to just
 x == 1.  This is my attempt to fix that: if we see (CST1  A) == CST2
 and CST2 is a multiple of CST1, use log2 to get rid of the shift, but
 only if the result of the shift is a natural number (including zero).

 If CST2 is not a multiple of CST1, then the whole expression can be
 discarded, but I'd like to do that as a follow-up.
 (It would help if our current match.pd grammar allowed us to use else,
 any plans on doing that?)

 Bootstrapped/regtested on x86_64-linux, ok for trunk?

 2015-05-28  Marek Polacek  pola...@redhat.com

 PR tree-optimization/66299
 * match.pd ((CST1  A) == CST2 - A == log2 (CST2 / CST1),
 (CST1  A) != CST2 - A != log2 (CST2 / CST1)): New
 patterns.

 * gcc.dg/pr66299-1.c: New test.
 * gcc.dg/pr66299-2.c: New test.

 diff --git gcc/match.pd gcc/match.pd
 index abd7851..5d07a70 100644
 --- gcc/match.pd
 +++ gcc/match.pd
 @@ -676,6 +676,19 @@ along with GCC; see the file COPYING3.  If not see
(cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
(icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))

 +/* (CST1  A) == CST2 - A == log2 (CST2 / CST1)
 +   (CST1  A) != CST2 - A != log2 (CST2 / CST1)
 +   if CST2 is a multiple of CST1.  */
 +(for cmp (ne eq)
 + (simplify
 +  (cmp (lshift@3 INTEGER_CST@0 @1) INTEGER_CST@2)
 +  (if ((TREE_CODE (@3) != SSA_NAME || has_single_use (@3))

I think we have the single_use (@3) helper now.  Not sure why you
restrict this here though - we are only creating new constants.

Ok with dropping the single-use check.

 +wi::multiple_of_p (@2, @0, TYPE_SIGN (type)))
 +   (with {
 +int shift = wi::exact_log2 (wi::div_trunc (@2, @0, TYPE_SIGN (type))); }
 +   (if (shift != -1)
 +(cmp @1 { build_int_cst (TREE_TYPE (@1), shift); }))

so with else you mean

(if (shift != -1)
  ...
  (else-expr ...))

?  Sure that's possible.  Today you can write

   (if (shift != -1)
  ...)
   (if (shift == -1)
 ...)

or

   (if (shift != -1)
 )
   (else-expr)

which is equivalent if the if is the only one at the nesting level and
the then-expr
doesn't contain any further ones.  That is, it is equivalent to

 if () ...;
 else-expr;

thus the fall-thru

So (if ...) would get an optional else-expr, yes, that sounds useful.
I think we
already have some (if A ..) (if !A ..) in match.pd.

Thanks,
Richard.

 +
  /* Simplifications of conversions.  */

  /* Basic strip-useless-type-conversions / strip_nops.  */
 diff --git gcc/testsuite/gcc.dg/pr66299-1.c gcc/testsuite/gcc.dg/pr66299-1.c
 index e69de29..9d41275 100644
 --- gcc/testsuite/gcc.dg/pr66299-1.c
 +++ gcc/testsuite/gcc.dg/pr66299-1.c
 @@ -0,0 +1,83 @@
 +/* PR tree-optimization/66299 */
 +/* { dg-do run } */
 +/* { dg-options -fdump-tree-original } */
 +
 +void
 +test1 (int x)
 +{
 +  if ((0  x) != 0
 +  || (1  x) != 2
 +  || (2  x) != 4
 +  || (3  x) != 6
 +  || (4  x) != 8
 +  || (5  x) != 10
 +  || (6  x) != 12
 +  || (7  x) != 14
 +  || (8  x) != 16
 +  || (9  x) != 18
 +  || (10  x) != 20)
 +__builtin_abort ();
 +}
 +
 +void
 +test2 (int x)
 +{
 +  if (!((0  x) == 0
 + (1  x) == 4
 + (2  x) == 8
 + (3  x) == 12
 + (4  x) == 16
 + (5  x) == 20
 + (6  x) == 24
 + (7  x) == 28
 + (8  x) == 32
 + (9  x) == 36
 +(10  x) == 40))
 +__builtin_abort ();
 +}
 +
 +void
 +test3 (unsigned int x)
 +{
 +  if ((0U  x) != 0U
 +  || (1U  x) != 16U
 +  || (2U  x) != 32U
 +  || (3U  x) != 48U
 +  || (4U  x) != 64U
 +  || (5U  x) != 80U
 +  || (6U  x) != 96U
 +  || (7U  x) != 112U
 +  || (8U  x) != 128U
 +  || (9U  x) != 144U
 +  || (10U  x) != 160U)
 +__builtin_abort ();
 +}
 +
 +void
 +test4 (unsigned int x)
 +{
 +  if (!((0U  x) == 0U
 +   || (1U  x) == 8U
 +   || (2U  x) == 16U
 +   || (3U  x) == 24U
 +   || (4U  x) == 32U
 +   || (5U  x) == 40U
 +   || (6U  x) == 48U
 +   || (7U  x) == 56U
 +   || (8U  x) == 64U
 +   || (9U  x) == 72U
 +   || (10U  x) == 80U))
 +__builtin_abort ();
 +}
 +
 +int
 +main (void)
 +{
 +  test1 (1);
 +  test2 (2);
 +  test3 (4U);
 +  test4 (3U);
 +}
 +
 +/* { dg-final { scan-tree-dump-not  original } } */
 +/* { dg-final { cleanup-tree-dump original } } */
 diff --git gcc/testsuite/gcc.dg/pr66299-2.c gcc/testsuite/gcc.dg/pr66299-2.c
 index e69de29..dde0549 100644
 --- gcc/testsuite/gcc.dg/pr66299-2.c
 +++ gcc/testsuite/gcc.dg/pr66299-2.c
 @@ -0,0 +1,34 @@
 +/* PR tree-optimization/66299 */
 +/* { dg-do run } */
 +/* { dg-options -fdump-tree-optimized -O } */
 +
 +void
 +test1 (int x, unsigned u)
 +{
 +  if ((1U  x) != 64
 +  || (2  x) != u
 +  || (x  x) != 384
 +  || (3  x) == 9
 +

[Ada] Avoid use of secondary stack

2015-05-28 Thread Arnaud Charlet

This patch avoids the use of the secondary stack, and the corresponding cleanup
handlers, in many cases. For example, access discriminants no longer force
functions to return on the secondary stack. This is a speed improvement.
It is particularly relevant to the Ada.Containers.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-28  Bob Duff  d...@adacore.com

* sem_util.adb (Requires_Transient_Scope): Avoid returning
function results on the secondary stack in so many cases.

Index: sem_util.adb
===
--- sem_util.adb(revision 223813)
+++ sem_util.adb(working copy)
@@ -16951,14 +16951,50 @@
--
 
--  A transient scope is required when variable-sized temporaries are
-   --  allocated in the primary or secondary stack, or when finalization
-   --  actions must be generated before the next instruction.
+   --  allocated on the secondary stack, or when finalization actions must be
+   --  generated before the next instruction.
 
+   function Old_Requires_Transient_Scope (Id : Entity_Id) return Boolean;
+   function New_Requires_Transient_Scope (Id : Entity_Id) return Boolean;
+   --  ???We retain the old and new algorithms for Requires_Transient_Scope for
+   --  the time being. New_Requires_Transient_Scope is used by default; the
+   --  debug switch -gnatdQ can be used to do Old_Requires_Transient_Scope
+   --  instead. The intent is to use this temporarily to measure before/after
+   --  efficiency. Note: when this temporary code is removed, the documentation
+   --  of dQ in debug.adb should be removed.
+
function Requires_Transient_Scope (Id : Entity_Id) return Boolean is
+  Old_Result : constant Boolean := Old_Requires_Transient_Scope (Id);
+
+   begin
+  if Debug_Flag_QQ then
+ return Old_Result;
+  end if;
+
+  declare
+ New_Result : constant Boolean := New_Requires_Transient_Scope (Id);
+
+  begin
+ --  Assert that we're not putting things on the secondary stack if we
+ --  didn't before; we are trying to AVOID secondary stack when
+ --  possible.
+
+ if not Old_Result then
+pragma Assert (not New_Result);
+null;
+ end if;
+
+ return New_Result;
+  end;
+   end Requires_Transient_Scope;
+
+   --
+   -- Old_Requires_Transient_Scope --
+   --
+
+   function Old_Requires_Transient_Scope (Id : Entity_Id) return Boolean is
   Typ : constant Entity_Id := Underlying_Type (Id);
 
-   --  Start of processing for Requires_Transient_Scope
-
begin
   --  This is a private type which is not completed yet. This can only
   --  happen in a default expression (of a formal parameter or of a
@@ -16989,9 +17025,7 @@
   --  returned value is allocated on the secondary stack. Controlled
   --  type temporaries need finalization.
 
-  elsif Is_Tagged_Type (Typ)
-or else Has_Controlled_Component (Typ)
-  then
+  elsif Is_Tagged_Type (Typ) or else Has_Controlled_Component (Typ) then
  return not Is_Value_Type (Typ);
 
   --  Record type
@@ -16999,18 +17033,20 @@
   elsif Is_Record_Type (Typ) then
  declare
 Comp : Entity_Id;
+
  begin
 Comp := First_Entity (Typ);
 while Present (Comp) loop
if Ekind (Comp) = E_Component then
+
   --  ???It's not clear we need a full recursive call to
-  --  Requires_Transient_Scope here. Note that the following
-  --  can't happen.
+  --  Old_Requires_Transient_Scope here. Note that the
+  --  following can't happen.
 
   pragma Assert (Is_Definite_Subtype (Etype (Comp)));
   pragma Assert (not Has_Controlled_Component (Etype (Comp)));
 
-  if Requires_Transient_Scope (Etype (Comp)) then
+  if Old_Requires_Transient_Scope (Etype (Comp)) then
  return True;
   end if;
end if;
@@ -17033,7 +17069,7 @@
 
  --  If component type requires a transient scope, the array does too
 
- if Requires_Transient_Scope (Component_Type (Typ)) then
+ if Old_Requires_Transient_Scope (Component_Type (Typ)) then
 return True;
 
  --  Otherwise, we only need a transient scope if the size depends on
@@ -17049,8 +17085,133 @@
  pragma Assert (Is_Protected_Type (Typ) or else Is_Task_Type (Typ));
  return False;
   end if;
-   end Requires_Transient_Scope;
+   end Old_Requires_Transient_Scope;
 
+   --
+   -- New_Requires_Transient_Scope --
+   --
+
+   function New_Requires_Transient_Scope (Id : Entity_Id) return Boolean is
+
+  function

[PATCH] Optimize (CST1 A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Marek Polacek

This PR points out that we weren't able to optimize 1  x == 2 to just
x == 1.  This is my attempt to fix that: if we see (CST1  A) == CST2
and CST2 is a multiple of CST1, use log2 to get rid of the shift, but
only if the result of the shift is a natural number (including zero).

If CST2 is not a multiple of CST1, then the whole expression can be
discarded, but I'd like to do that as a follow-up.
(It would help if our current match.pd grammar allowed us to use else,
any plans on doing that?)

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-05-28  Marek Polacek  pola...@redhat.com

PR tree-optimization/66299
* match.pd ((CST1  A) == CST2 - A == log2 (CST2 / CST1),
(CST1  A) != CST2 - A != log2 (CST2 / CST1)): New
patterns.

* gcc.dg/pr66299-1.c: New test.
* gcc.dg/pr66299-2.c: New test.

diff --git gcc/match.pd gcc/match.pd
index abd7851..5d07a70 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -676,6 +676,19 @@ along with GCC; see the file COPYING3.  If not see
   (cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
   (icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))
 
+/* (CST1  A) == CST2 - A == log2 (CST2 / CST1)
+   (CST1  A) != CST2 - A != log2 (CST2 / CST1)
+   if CST2 is a multiple of CST1.  */
+(for cmp (ne eq)
+ (simplify
+  (cmp (lshift@3 INTEGER_CST@0 @1) INTEGER_CST@2)
+  (if ((TREE_CODE (@3) != SSA_NAME || has_single_use (@3))
+wi::multiple_of_p (@2, @0, TYPE_SIGN (type)))
+   (with {
+int shift = wi::exact_log2 (wi::div_trunc (@2, @0, TYPE_SIGN (type))); }
+   (if (shift != -1)
+(cmp @1 { build_int_cst (TREE_TYPE (@1), shift); }))
+
 /* Simplifications of conversions.  */
 
 /* Basic strip-useless-type-conversions / strip_nops.  */
diff --git gcc/testsuite/gcc.dg/pr66299-1.c gcc/testsuite/gcc.dg/pr66299-1.c
index e69de29..9d41275 100644
--- gcc/testsuite/gcc.dg/pr66299-1.c
+++ gcc/testsuite/gcc.dg/pr66299-1.c
@@ -0,0 +1,83 @@
+/* PR tree-optimization/66299 */
+/* { dg-do run } */
+/* { dg-options -fdump-tree-original } */
+
+void
+test1 (int x)
+{
+  if ((0  x) != 0
+  || (1  x) != 2
+  || (2  x) != 4
+  || (3  x) != 6
+  || (4  x) != 8
+  || (5  x) != 10
+  || (6  x) != 12
+  || (7  x) != 14
+  || (8  x) != 16
+  || (9  x) != 18
+  || (10  x) != 20)
+__builtin_abort ();
+}
+
+void
+test2 (int x)
+{
+  if (!((0  x) == 0
+ (1  x) == 4
+ (2  x) == 8
+ (3  x) == 12
+ (4  x) == 16
+ (5  x) == 20
+ (6  x) == 24
+ (7  x) == 28
+ (8  x) == 32
+ (9  x) == 36
+(10  x) == 40))
+__builtin_abort ();
+}
+
+void
+test3 (unsigned int x)
+{
+  if ((0U  x) != 0U
+  || (1U  x) != 16U
+  || (2U  x) != 32U
+  || (3U  x) != 48U
+  || (4U  x) != 64U
+  || (5U  x) != 80U
+  || (6U  x) != 96U
+  || (7U  x) != 112U
+  || (8U  x) != 128U
+  || (9U  x) != 144U
+  || (10U  x) != 160U)
+__builtin_abort ();
+}
+
+void
+test4 (unsigned int x)
+{
+  if (!((0U  x) == 0U
+   || (1U  x) == 8U
+   || (2U  x) == 16U
+   || (3U  x) == 24U
+   || (4U  x) == 32U
+   || (5U  x) == 40U
+   || (6U  x) == 48U
+   || (7U  x) == 56U
+   || (8U  x) == 64U
+   || (9U  x) == 72U
+   || (10U  x) == 80U))
+__builtin_abort ();
+}
+
+int
+main (void)
+{
+  test1 (1);
+  test2 (2);
+  test3 (4U);
+  test4 (3U);
+}
+
+/* { dg-final { scan-tree-dump-not  original } } */
+/* { dg-final { cleanup-tree-dump original } } */
diff --git gcc/testsuite/gcc.dg/pr66299-2.c gcc/testsuite/gcc.dg/pr66299-2.c
index e69de29..dde0549 100644
--- gcc/testsuite/gcc.dg/pr66299-2.c
+++ gcc/testsuite/gcc.dg/pr66299-2.c
@@ -0,0 +1,34 @@
+/* PR tree-optimization/66299 */
+/* { dg-do run } */
+/* { dg-options -fdump-tree-optimized -O } */
+
+void
+test1 (int x, unsigned u)
+{
+  if ((1U  x) != 64
+  || (2  x) != u
+  || (x  x) != 384
+  || (3  x) == 9
+  || (x  14) != 98304U
+  || (1  x) == 14
+  || (3  2) != 12)
+__builtin_abort ();
+}
+
+void
+test2 (int x)
+{
+  unsigned int t = ((unsigned int) 1U  x);
+  if (t != 2U)
+__builtin_abort ();
+}
+
+int
+main (void)
+{
+  test1 (6, 128U);
+  test2 (1);
+}
+
+/* { dg-final { scan-tree-dump-not  optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */

Marek

[Ada] Avoid use of secondary stack

2015-05-28 Thread Arnaud Charlet

This patch avoids the use of the secondary stack, and the corresponding cleanup
handlers, in many cases. For example, access discriminants no longer force
functions to return on the secondary stack. This is a speed improvement.
It is particularly relevant to the Ada.Containers.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-28  Bob Duff  d...@adacore.com

* sem_util.adb (Requires_Transient_Scope): For definite untagged
subtypes, we should never have to use the secondary stack. This moves
toward that goal. But there are still cases that don't work.
Here, we move the check for Is_Definite first, but add a
special-purpose check for Has_Discrim_Dep_Array.

Index: sem_util.adb
===
--- sem_util.adb(revision 223814)
+++ sem_util.adb(working copy)
@@ -17103,6 +17103,11 @@
   --  could be nested inside some other record that is constrained by
   --  nondiscriminants). That is, the recursive calls are too conservative.
 
+  function Has_Discrim_Dep_Array (Typ : Entity_Id) return Boolean;
+  --  True if we find certain discriminant-dependent array
+  --  subcomponents. This shouldn't be necessary, but without this check,
+  --  we crash in gimplify. ???
+
   function Caller_Known_Size_Record (Typ : Entity_Id) return Boolean is
  pragma Assert (Typ = Underlying_Type (Typ));
 
@@ -17150,8 +17155,50 @@
  return True;
   end Caller_Known_Size_Record;
 
-  --  Local deeclarations
+  function Has_Discrim_Dep_Array (Typ : Entity_Id) return Boolean is
+ pragma Assert (Typ = Underlying_Type (Typ));
 
+  begin
+ if Is_Array_Type (Typ) then
+return Size_Depends_On_Discriminant (Typ);
+ end if;
+
+ if Is_Record_Type (Typ)
+   or else
+   Is_Protected_Type (Typ)
+ then
+declare
+   Comp : Entity_Id := First_Entity (Typ);
+
+begin
+   while Present (Comp) loop
+
+  --  Only look at E_Component entities. No need to look at
+  --  E_Discriminant entities, and we must ignore internal
+  --  subtypes generated for constrained components.
+
+  if Ekind (Comp) = E_Component then
+ declare
+Comp_Type : constant Entity_Id :=
+  Underlying_Type (Etype (Comp));
+
+ begin
+if Has_Discrim_Dep_Array (Comp_Type) then
+   return True;
+end if;
+ end;
+  end if;
+
+  Next_Entity (Comp);
+   end loop;
+end;
+ end if;
+
+ return False;
+  end Has_Discrim_Dep_Array;
+
+  --  Local declarations
+
   Typ : constant Entity_Id := Underlying_Type (Id);
 
--  Start of processing for New_Requires_Transient_Scope
@@ -17184,26 +17231,26 @@
   elsif Is_Tagged_Type (Typ) or else Has_Controlled_Component (Typ) then
  return not Is_Value_Type (Typ);
 
-  --  Indefinite (discriminated) untagged record or protected type
-
-  elsif Is_Record_Type (Typ) or else Is_Protected_Type (Typ) then
- return not Caller_Known_Size_Record (Typ);
- --  ???Should come after Is_Definite_Subtype below
-
   --  Untagged definite subtypes are known size. This includes all
   --  elementary [sub]types. Tasks are known size even if they have
   --  discriminants.
 
   elsif Is_Definite_Subtype (Typ) or else Is_Task_Type (Typ) then
- if Is_Array_Type (Typ) -- ???Shouldn't be necessary
-   and then New_Requires_Transient_Scope
-  (Underlying_Type (Component_Type (Typ)))
- then
-return True;
+ if Is_Record_Type (Typ) or else Is_Protected_Type (Typ) then
+if not Has_Discriminants (Typ) then
+   if Has_Discrim_Dep_Array (Typ) then
+  return True; -- ???Shouldn't be necessary
+   end if;
+end if;
  end if;
 
  return False;
 
+  --  Indefinite (discriminated) untagged record or protected type
+
+  elsif Is_Record_Type (Typ) or else Is_Protected_Type (Typ) then
+ return not Caller_Known_Size_Record (Typ);
+
   --  Unconstrained array
 
   else

Re: [patch] libstdc++/65352 fix ubsan errors in std::arrayT, 0

2015-05-28 Thread Marc Glisse


On Thu, 28 May 2015, Jonathan Wakely wrote:


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::arrayT, 0 anyway.


Should

return *static_cast_Tp*(nullptr);

be replaced with

__builtin_unreachable();

then? It seems strange to keep an implementation that is never supposed to 
be used.


--
Marc Glisse

Re: Do less generous pointer globbing in alias.c

2015-05-28 Thread Jan Hubicka

Hi,
here is updated version of patch.  It makes alias_set_subset_of to be symmetric 
for 
ptr_type_node and other pointer type and moves the logic of creating subsets
to get_alias_set.

I tested that perlbmk works when built at -O3 x86_64

Bootstrapped/regtested x86_64-linux, OK?

Honza

* alias.c (alias_set_entry_d): Add is_pointer.
(alias_set_subset_of): Special case pointers.
(init_alias_set_entry): Break out from ...
(record_alias_subset): ... here.
(get_alias_set): Do less generous pointer globbing.
* gcc.dg/alias-8.c: Do not xfail.
* gcc.dg/pr62167.c: Prevent FRE.
Index: alias.c
===
--- alias.c (revision 223772)
+++ alias.c (working copy)
@@ -183,10 +184,6 @@ struct GTY(()) alias_set_entry_d {
   /* The alias set number, as stored in MEM_ALIAS_SET.  */
   alias_set_type alias_set;
 
-  /* Nonzero if would have a child of zero: this effectively makes this
- alias set the same as alias set zero.  */
-  int has_zero_child;
-
   /* The children of the alias set.  These are not just the immediate
  children, but, in fact, all descendants.  So, if we have:
 
@@ -195,6 +192,15 @@ struct GTY(()) alias_set_entry_d {
  continuing our example above, the children here will be all of
  `int', `double', `float', and `struct S'.  */
   hash_mapint, int, alias_set_traits *children;
+
+  /* Nonzero if would have a child of zero: this effectively makes this
+ alias set the same as alias set zero.  */
+  bool has_zero_child;
+  /* Nonzero if alias set corresponds to pointer type itself (i.e. not to
+ aggregate contaiing pointer.
+ This is used for a special case where we need an universal pointer type
+ compatible with all other pointer types.  */
+  bool is_pointer;
 };
 typedef struct alias_set_entry_d *alias_set_entry;
 
@@ -460,12 +466,33 @@ alias_set_subset_of (alias_set_type set1
   if (set2 == 0)
 return true;
 
-  /* Otherwise, check if set1 is a subset of set2.  */
+  /* Check if set1 is a subset of set2.  */
   ase = get_alias_set_entry (set2);
   if (ase != 0
(ase-has_zero_child
  || ase-children-get (set1)))
 return true;
+
+  /* As a special case we consider alias set of void * to be both subset
+ and superset of every alias set of a pointer.  This extra symmetry does
+ not matter for alias_sets_conflict_p but it makes 
aliasing_component_refs_p
+ to return true on the following testcase:
+
+ void *ptr;
+ char **ptr2=(char **)ptr;
+ *ptr2 = ...
+
+ This makes void * truly universal pointer type.  See pointer handling in
+ get_alias_set for more details.  */
+  if (ase  ase-is_pointer)
+{
+  alias_set_entry ase1 = get_alias_set_entry (set1);
+
+  if (ase1  ase1-is_pointer
+  (set1 == TYPE_ALIAS_SET (ptr_type_node)
+ || set2 == TYPE_ALIAS_SET (ptr_type_node)))
+   return true;
+}
   return false;
 }
 
@@ -764,6 +791,21 @@ alias_ptr_types_compatible_p (tree t1, t
  == TYPE_MAIN_VARIANT (TREE_TYPE (t2)));
 }
 
+/* Create emptry alias set entry.  */
+
+alias_set_entry
+init_alias_set_entry (alias_set_type set)
+{
+  alias_set_entry ase = ggc_cleared_allocalias_set_entry_d ();
+  ase-alias_set = set;
+  ase-children
+= hash_mapint, int, alias_set_traits::create_ggc (64);
+  ase-has_zero_child = 0;
+  gcc_checking_assert (!get_alias_set_entry (set));
+  (*alias_sets)[set] = ase;
+  return ase;
+}
+
 /* Return the alias set for T, which may be either a type or an
expression.  Call language-specific routine for help, if needed.  */
 
@@ -903,35 +945,92 @@ get_alias_set (tree t)
  the pointed-to types.  This issue has been reported to the
  C++ committee.
 
- In addition to the above canonicalization issue, with LTO
- we should also canonicalize `T (*)[]' to `T *' avoiding
- alias issues with pointer-to element types and pointer-to
- array types.
-
- Likewise we need to deal with the situation of incomplete
- pointed-to types and make `*(struct X **)a' and
- `*(struct X {} **)a' alias.  Otherwise we will have to
- guarantee that all pointer-to incomplete type variants
- will be replaced by pointer-to complete type variants if
- they are available.
-
- With LTO the convenient situation of using `void *' to
- access and store any pointer type will also become
- more apparent (and `void *' is just another pointer-to
- incomplete type).  Assigning alias-set zero to `void *'
- and all pointer-to incomplete types is a not appealing
- solution.  Assigning an effective alias-set zero only
- affecting pointers might be - by recording proper subset
- relationships of all pointer alias-sets.
-
- Pointer-to function types are another grey area which
- needs caution.  Globbing them all into one alias-set
- or the above effective zero set would work.
-
- For now just

Re: [patch] libstdc++/65352 fix ubsan errors in std::arrayT, 0

2015-05-28 Thread Jonathan Wakely


On 28/05/15 14:38 +0100, Jonathan Wakely wrote:

On 28/05/15 15:26 +0200, Marc Glisse wrote:

On Thu, 28 May 2015, Jonathan Wakely wrote:


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::arrayT, 0 anyway.


Should

return *static_cast_Tp*(nullptr);

be replaced with

__builtin_unreachable();

then? It seems strange to keep an implementation that is never 
supposed to be used.


That's a good idea, I experimented with just not defining it but that
fails for explicit instantiations of arrayT, 0.


Would there be a danger of an object compiled with gcc-5.1 that calls
arrayT, 0::data() finding the _S_ref from an object compiled with
gcc-5.2 and hitting the __builtin_unreachable in vali code?

Re: [patch] libstdc++/65352 fix ubsan errors in std::arrayT, 0

2015-05-28 Thread Jonathan Wakely


On 28/05/15 15:26 +0200, Marc Glisse wrote:

On Thu, 28 May 2015, Jonathan Wakely wrote:


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::arrayT, 0 anyway.


Should

return *static_cast_Tp*(nullptr);

be replaced with

__builtin_unreachable();

then? It seems strange to keep an implementation that is never 
supposed to be used.


That's a good idea, I experimented with just not defining it but that
fails for explicit instantiations of arrayT, 0.

Re: [gomp4] Preserve NVPTX reconvergence points

2015-05-28 Thread Richard Biener

On Thu, May 28, 2015 at 4:06 PM, Julian Brown jul...@codesourcery.com wrote:
 For NVPTX, it is vitally important that the divergence of threads
 within a warp can be controlled: in particular we must be able to
 generate code that we know reconverges at a particular point.
 Unfortunately GCC's middle-end optimisers can cause this property to
 be violated, which causes problems for the OpenACC execution model
 we're planning to use for NVPTX.

 As a brief example: code running in vector-single mode runs on a
 single thread of a warp, and must broadcast condition results to other
 threads of the warp so that they can follow along and be ready for
 vector-partitioned execution when necessary.

 #pragma acc parallel
 {
   #pragma acc loop gang
   for (i = 0; i  N; i++)
   {
 /* This is vector-single mode.  */
 n = ...;
 switch (n)
 {
 case 1:
   #pragma acc loop vector
   for (...)
   {
 /* This is vector-partitioned mode.  */
   }
   ...
 }
   }
 }

 Here, the calculation n = ... takes place on a single thread (of
 each partitioned gang of the outer loop), but the switch statement
 (terminating the BB) must be executed by all threads in the warp. The
 vector-single statements will be translated using a branch around for
 the idle threads:

 if (threadIdx.x == 0)
 {
   n_0 = ...;
 }
 n_x = broadcast (n_0)
 switch (n_x)
 ...

 Where broadcast is an operation that transfers values from some
 other thread of a warp (i.e., the zeroth) to the current thread
 (implemented as a shfl instruction for NVPTX).

 I observed a similar example to this cloning the broadcast and switch
 instructions (in the .dom1 dump), along the lines of:

 if (threadIdx.x == 0)
 {
   n_0 = ...;
   n_x = broadcast (n_0)
   switch (n_x)
   ...
 }
 else
 {
   n_x = broadcast (n_0)
   switch (n_x)
   ...
 }

 This doesn't work because the broadcast operation has to be run with
 non-diverged warps for correct operation, and here there is divergence
 due to the if (threadIdx.x == 0) condition.

 So, the way I have tried to handle this is by attempting to inhibit
 optimisation along edges which have a reconvergence point as their
 destination. The essential idea is to make such edges abnormal,
 although the existing EDGE_ABNORMAL flag is not used because that has
 implicit meaning built into it already, and the new edge type may need
 to be handled differently in some areas. One example is that at
 present, blocks concluding with GIMPLE_COND cannot have EDGE_ABNORMAL
 set on their EDGE_TRUE or EDGE_FALSE outgoing edges.

 The attached patch introduces a new edge flag (EDGE_TO_RECONVERGENCE),
 for the GIMPLE CFG only. In principle there's nothing to stop the flag
 being propagated to the RTL CFG also, in which case it'd probably be
 set at the same time as EDGE_ABNORMAL, mirroring the semantics of e.g.
 EDGE_EH, EDGE_ABNORMAL_CALL and EDGE_SIBCALL. Then, passes which
 inspect the RTL CFG can continue to only check the ABNORMAL flag. But
 so far (in rather limited testing!), that has not been observed to be
 necessary. (We can control RTL CFG manipulation indirectly by using the
 CANNOT_COPY_INSN_P target hook, sensitive e.g. to the broadcast
 instruction.)

 For the GIMPLE CFG (i.e. in passes operating on GIMPLE form),
 EDGE_TO_RECONVERGENCE behaves mostly the same as EDGE_ABNORMAL (i.e.,
 inhibiting certain optimisations), and so has been added to relevant
 conditionals largely mechanically. Places where it is treated specially
 are:

 * tree-cfg.c:gimple_verify_flow_info does not permit EDGE_ABNORMAL on
   outgoing edges of a block concluding with a GIMPLE_COND statement.
   But, we allow EDGE_TO_RECONVERGENCE there.

 * tree-vrp.c:find_conditional_asserts skips over outgoing GIMPLE_COND
   edges with EDGE_TO_RECONVERGENCE set (avoiding an ICE when the pass
   tries to split the edge later).

 There are probably other optimisations that will be tripped up by the
 new flag along the same lines as the VRP tweak above, which we will no
 doubt discover in due course.

 Together with the patch,

   https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02612.html

 This shows no regressions for the libgomp tests.

 OK for gomp4 branch?

Hmm, I don't think adding a new edge flag is good nor necessary.  It seems to
me that instead the broadcast operation should have abnormal control flow
and thus basic-blocks should be split either before or after it (so either
incoming or outgoing edge(s) should be abnormal).  I suppose splitting
before the broadcast would be best (thus handle it similar to setjmp ()).

Richard.

 Thanks,

 Julian

 ChangeLog

 gcc/
 * basic-block.h (EDGE_COMPLEX): Add EDGE_TO_RECONVERGENCE flag.
 (bb_hash_abnorm_or_reconv_pred): New function.
 (hash_abnormal_or_eh_outgoing_edge_p): Consider
 EDGE_TO_RECONVERGENCE also.
 * cfg-flags.def (TO_RECONVERGENCE): Add flag.
 * omp-low.c (predicate_bb): Set EDGE_TO_RECONVERGENCE on edges
 leading to a reconvergence

PATCH: Mention --enable-default-pie in gcc-6/changes.html

2015-05-28 Thread H.J. Lu

OK to install?

H.J.
---
Index: gcc-6/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/changes.html,v
retrieving revision 1.10
diff -u -p -r1.10 changes.html
--- gcc-6/changes.html  26 May 2015 10:12:08 -  1.10
+++ gcc-6/changes.html  28 May 2015 13:49:00 -
@@ -140,8 +140,12 @@ enum {
 
 
 !-- .. --
-!-- h2Other significant improvements/h2 --
+h2Other significant improvements/h2
 
+  ul
+liAdded code--enable-default-pie/code configure option to
+   generate PIE by default./li
+  /ul
 
 /body
 /html

[PATCH, libgomp, x86]: Optimize i386 futex_wake syscall

2015-05-28 Thread Uros Bizjak

Hello!

This patch avoids unnecessary clearing of 4th argument for futex_wake
syscall for 32bit targets.

2015-05-28  Uros Bizjak  ubiz...@gmail.com

* config/linux/x86/futex.h (sys_futex0) [!__x86_64__]: Remove function.
(futex_wait) [!__x86_64__]: Use __asm operand instead of sys_futex0.
(futex_wake) [!__x86_64__]: Ditto.

Tested on x86_64-linux-gnu {,-m32} and committed to mainline SVN.

Uros.
Index: config/linux/x86/futex.h
===
--- config/linux/x86/futex.h(revision 223796)
+++ config/linux/x86/futex.h(working copy)
@@ -81,43 +81,49 @@
 #  define SYS_futex240
 # endif
 
-static inline long
-sys_futex0 (int *addr, int op, int val)
+static inline void
+futex_wait (int *addr, int val)
 {
   long res;
 
   __asm volatile (int $0x80
  : =a (res)
- : 0(SYS_futex), b (addr), c(op),
-   d(val), S(0)
+ : 0 (SYS_futex), b (addr), c (gomp_futex_wait),
+   d (val), S (0)
  : memory);
-  return res;
-}
-
-static inline void
-futex_wait (int *addr, int val)
-{
-  long res = sys_futex0 (addr, gomp_futex_wait, val);
   if (__builtin_expect (res == -ENOSYS, 0))
 {
   gomp_futex_wait = ~FUTEX_PRIVATE_FLAG;
   gomp_futex_wake = ~FUTEX_PRIVATE_FLAG;
-  sys_futex0 (addr, gomp_futex_wait, val);
+  __asm volatile (int $0x80
+ : =a (res)
+ : 0 (SYS_futex), b (addr), c (gomp_futex_wait),
+   d (val), S (0)
+ : memory);
 }
 }
 
 static inline void
 futex_wake (int *addr, int count)
 {
-  long res = sys_futex0 (addr, gomp_futex_wake, count);
+  long res;
+
+  __asm volatile (int $0x80
+ : =a (res)
+ : 0 (SYS_futex), b (addr), c (gomp_futex_wake),
+   d (count)
+ : memory);
   if (__builtin_expect (res == -ENOSYS, 0))
 {
   gomp_futex_wait = ~FUTEX_PRIVATE_FLAG;
   gomp_futex_wake = ~FUTEX_PRIVATE_FLAG;
-  sys_futex0 (addr, gomp_futex_wake, count);
+  __asm volatile (int $0x80
+ : =a (res)
+ : 0 (SYS_futex), b (addr), c (gomp_futex_wake),
+   d (count)
+ : memory);
 }
 }
-
 #endif /* __x86_64__ */
 
 static inline void

1 2 >

1 - 100 of 126 matches

Mail list logo