Re: [driver, LTO Patch]: Resurrect user specs support

2012-05-28 Thread Christian Bruel
Hello

On 05/22/2012 03:52 PM, Joseph S. Myers wrote:
 On Mon, 21 May 2012, Christian Bruel wrote:
 
 1) Lazily check the flag validation until all command line spec files
 are read. For this purpose, 'read_specs' records specs, to be analyzed
 with 'file_spec_p'. Such flags have 'live_cond' = SWITCH_USER
 
 I like the idea of allowing flags mentioned in user specs but not other 
 specs - but not the implementation using this new live_cond.  

OK, I have removed the SWITCH_USER flag and replaced it by a bool in the
struct spec_list. This field is now passed as parameter to
validate_switches. Maybe less central, but more flexible as you proposed.

 There are a 
 lot of places in gcc.c that set validated, and I can't convince myself 
 that this implementation will ensure they all take correct account of 
 where the relevant spec (if any) came from without causing any other 
 change to how the driver behaves.  For example, I don't see any change to 
 the setting of validated for % and % in do_spec_1 to account for where 
 the spec came from.
 
 Instead, I think that any function that sets validated based on a spec 
 should be passed the information about whether it's a user spec or not.  
 So validate_all_switches would need to pass that down to 
 validate_switches_from_spec, for example - and do_spec_1 would also need 
 to get that information.

I shared the same concern, however, after playing bits with spec toys, I
couldn't a find a way to get a % switch recognition failure, since the
switches passed on the command line at this point are already validated
if necessary.

I put a simple example of this in attachment to illustrate this. But I
might lack imagination to make up a regression.

We indeed now have all the information to pass down to the do_specs
interfaces, but this would be very intrusive, I'm reluctant to do it if
not strictly necessary. Do you see a way an invalid option could be
accidentally validated ? I would have thought that with the current
implementation invalid flags are detected earlier.

Cheers

Christian





Index: gcc/gcc.c
===
--- gcc/gcc.c	(revision 187500)
+++ gcc/gcc.c	(working copy)
@@ -190,8 +190,8 @@
 static void store_arg (const char *, int, int);
 static void insert_wrapper (const char *);
 static char *load_specs (const char *);
-static void read_specs (const char *, int);
-static void set_spec (const char *, const char *);
+static void read_specs (const char *, bool, bool);
+static void set_spec (const char *, const char *, bool);
 static struct compiler *lookup_compiler (const char *, size_t, const char *);
 static char *build_search_list (const struct path_prefix *, const char *,
 bool, bool);
@@ -227,9 +227,9 @@
 static void do_self_spec (const char *);
 static const char *find_file (const char *);
 static int is_directory (const char *, bool);
-static const char *validate_switches (const char *);
+static const char *validate_switches (const char *, bool);
 static void validate_all_switches (void);
-static inline void validate_switches_from_spec (const char *);
+static inline void validate_switches_from_spec (const char *, bool);
 static void give_switch (int, int);
 static int used_arg (const char *, int);
 static int default_arg (const char *, int);
@@ -1170,11 +1170,12 @@
   const char **ptr_spec;	/* pointer to the spec itself.  */
   struct spec_list *next;	/* Next spec in linked list.  */
   int name_len;			/* length of the name */
-  int alloc_p;			/* whether string was allocated */
+  bool user_p;			/* whether string come from file spec.  */
+  bool alloc_p;			/* whether string was allocated */
 };
 
 #define INIT_STATIC_SPEC(NAME,PTR) \
-{ NAME, NULL, PTR, (struct spec_list *) 0, sizeof (NAME) - 1, 0 }
+  { NAME, NULL, PTR, (struct spec_list *) 0, sizeof (NAME) - 1, false, false }
 
 /* List of statically defined specs.  */
 static struct spec_list static_specs[] =
@@ -1478,7 +1479,7 @@
current spec.  */
 
 static void
-set_spec (const char *name, const char *spec)
+set_spec (const char *name, const char *spec, bool user_p)
 {
   struct spec_list *sl;
   const char *old_spec;
@@ -1530,7 +1531,8 @@
   if (old_spec  sl-alloc_p)
 free (CONST_CAST(char *, old_spec));
 
-  sl-alloc_p = 1;
+  sl-user_p = user_p;
+  sl-alloc_p = true;
 }
 
 /* Accumulate a command (program name and args), and run it.  */
@@ -1686,7 +1688,7 @@
Anything invalid in the file is a fatal error.  */
 
 static void
-read_specs (const char *filename, int main_p)
+read_specs (const char *filename, bool main_p, bool user_p)
 {
   char *buffer;
   char *p;
@@ -1735,7 +1737,7 @@
 
 	  p[-2] = '\0';
 	  new_filename = find_a_file (startfile_prefixes, p1, R_OK, true);
-	  read_specs (new_filename ? new_filename : p1, FALSE);
+	  read_specs (new_filename ? new_filename : p1, false, user_p);
 	  continue;
 	}
 	  else if (!strncmp (p1, %include_noerr, sizeof %include_noerr - 1)
@@ -1756,7 +1758,7 

[PATCH 2/2] gcc symbol database

2012-05-28 Thread Yunfeng ZHANG
diff -upr .pc/symdb_enhance_plugin/gcc/c-family/c-common.h
gcc/c-family/c-common.h
--- .pc/symdb_enhance_plugin/gcc/c-family/c-common.h2011-12-21
04:44:13.0 +0800
+++ gcc/c-family/c-common.h2012-05-25 14:56:56.776263281 +0800
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.
 #include splay-tree.h
 #include cpplib.h
 #include ggc.h
+#include c-pragma.h

 /* In order for the format checking to accept the C frontend
diagnostic framework extensions, you must include this file before
@@ -1116,4 +1117,43 @@ struct GTY(()) tree_userdef_literal {

 extern tree build_userdef_literal (tree suffix_id, tree value, tree
num_string);

+/* The following local token type is used.  */
+
+/* A keyword.  */
+#define CPP_KEYWORD ((enum cpp_ttype) (N_TTYPES + 1))
+
+/* More information about the type of a CPP_NAME token.  */
+typedef enum c_id_kind {
+  /* An ordinary identifier.  */
+  C_ID_ID,
+  /* An identifier declared as a typedef name.  */
+  C_ID_TYPENAME,
+  /* An identifier declared as an Objective-C class name.  */
+  C_ID_CLASSNAME,
+  /* An address space identifier.  */
+  C_ID_ADDRSPACE,
+  /* Not an identifier.  */
+  C_ID_NONE
+} c_id_kind;
+
+/* A single C token after string literal concatenation and conversion
+   of preprocessing tokens to tokens.  */
+typedef struct GTY (()) c_token {
+  /* The kind of token.  */
+  ENUM_BITFIELD (cpp_ttype) type : 8;
+  /* If this token is a CPP_NAME, this value indicates whether also
+ declared as some kind of type.  Otherwise, it is C_ID_NONE.  */
+  ENUM_BITFIELD (c_id_kind) id_kind : 8;
+  /* If this token is a keyword, this value indicates which keyword.
+ Otherwise, this value is RID_MAX.  */
+  ENUM_BITFIELD (rid) keyword : 8;
+  /* If this token is a CPP_PRAGMA, this indicates the pragma that
+ was seen.  Otherwise it is PRAGMA_NONE.  */
+  ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
+  /* The value associated with this token, if any.  */
+  tree value;
+  /* The location at which this token was found.  */
+  location_t location;
+} c_token;
+
 #endif /* ! GCC_C_COMMON_H */
diff -upr .pc/symdb_enhance_plugin/gcc/c-family/c-lex.c gcc/c-family/c-lex.c
--- .pc/symdb_enhance_plugin/gcc/c-family/c-lex.c2011-10-27
03:31:16.0 +0800
+++ gcc/c-family/c-lex.c2012-05-25 14:56:56.767134882 +0800
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.
 #include splay-tree.h
 #include debug.h
 #include target.h
+#include plugin.h

 /* We may keep statistics about how long which files took to compile.  */
 static int header_time, body_time;
@@ -380,6 +381,7 @@ c_lex_with_flags (tree *value, location_
 case CPP_STRING32:
 case CPP_UTF8STRING:
   type = lex_string (tok, value, true, true);
+  tok = NULL;
   break;

 case CPP_NAME:
@@ -481,6 +483,7 @@ c_lex_with_flags (tree *value, location_
 {
   type = lex_string (tok, value, false,
  (lex_flags  C_LEX_STRING_NO_TRANSLATE) == 0);
+  tok = NULL;
   break;
 }
   *value = build_string (tok-val.str.len, (const char *)
tok-val.str.text);
@@ -515,6 +518,7 @@ c_lex_with_flags (tree *value, location_
 }

   timevar_pop (TV_CPP);
+  invoke_plugin_callbacks (PLUGIN_CPP_TOKEN, (cpp_token*) tok);

   return type;
 }
diff -upr .pc/symdb_enhance_plugin/gcc/c-parser.c gcc/c-parser.c
--- .pc/symdb_enhance_plugin/gcc/c-parser.c2011-12-21
04:44:13.0 +0800
+++ gcc/c-parser.c2012-05-25 14:56:56.772261126 +0800
@@ -121,45 +121,6 @@ c_parse_init (void)
C++).  It would then be possible to share more of the C and C++
lexer code, if desired.  */

-/* The following local token type is used.  */
-
-/* A keyword.  */
-#define CPP_KEYWORD ((enum cpp_ttype) (N_TTYPES + 1))
-
-/* More information about the type of a CPP_NAME token.  */
-typedef enum c_id_kind {
-  /* An ordinary identifier.  */
-  C_ID_ID,
-  /* An identifier declared as a typedef name.  */
-  C_ID_TYPENAME,
-  /* An identifier declared as an Objective-C class name.  */
-  C_ID_CLASSNAME,
-  /* An address space identifier.  */
-  C_ID_ADDRSPACE,
-  /* Not an identifier.  */
-  C_ID_NONE
-} c_id_kind;
-
-/* A single C token after string literal concatenation and conversion
-   of preprocessing tokens to tokens.  */
-typedef struct GTY (()) c_token {
-  /* The kind of token.  */
-  ENUM_BITFIELD (cpp_ttype) type : 8;
-  /* If this token is a CPP_NAME, this value indicates whether also
- declared as some kind of type.  Otherwise, it is C_ID_NONE.  */
-  ENUM_BITFIELD (c_id_kind) id_kind : 8;
-  /* If this token is a keyword, this value indicates which keyword.
- Otherwise, this value is RID_MAX.  */
-  ENUM_BITFIELD (rid) keyword : 8;
-  /* If this token is a CPP_PRAGMA, this indicates the pragma that
- was seen.  Otherwise it is PRAGMA_NONE.  */
-  ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
-  /* The location at which this token was found.  */
-  location_t location;
-  /* The value associated with this token, 

[RFA/ARM] Add ACLE Predefined macro support

2012-05-28 Thread Matthew Gretton-Dann

All,

This patch adds a variety of predefined macros to reveal the presence of
various features of the ARM architecture.  These are detailed in the ARM
C Language Extensions specification, available here:
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053-/index.html

This patch then adds compiler predefines for:

__ARM_SIZEOF_MINIMAL_ENUM which is defined as the size in bytes
of the smallest enum.

__ARM_ARCH which is defined as the major revision of the ARM
instruction set which the target implements.

__ARM_ARCH_ISA_THUMB which is defined as the major revision of
the thumb instruction set which the target implements.

__ARM_ARCH_PROFILE which is defined on ARMv7 targets, and ARMv6-M
targets to be the character value of `A', `R' or `M', as defined
by the target's architecture profile.

__ARM_FEATURE_LDREX which is defined as a bit mask, composed of
the widths of `ldrex' available on the target. These widths are:
bit 0 - byte.
bit 1 - 16-bit halfword.
bit 2 - 32-bit word.
bit 3 - 64-bit doubleword.

__ARM_FEATURE_CLZ which is defined for targets which support
the `clz' instruction.

__ARM_FEATURE_SIMD32 which is defined when the ARMv6 integer
SIMD instructions are available.

__ARM_FEATURE_QBIT which is defined when the Q-Bit is present in the
APSR.

__ARM_FEATURE_SAT which is defined when the saturation instructions are
available.

__ARM_FP which is defined as a bit mask composed of the widths
of floating-point types with hardware support on the target.
These widths are:
bit 1 - 16-bit half precision.
bit 2 - 32-bit single precision.
bit 3 - 64-bit double precision.

__ARM_FP16_FORMAT_IEEE which is defined when the IEEE 754-2008
standard for 16-bit floating point representation is used.

__ARM_FP16_FORMAT_ALTERNATIVE which is defined when the ARM
alternative standard for 16-bit floating point representation
is used.

__ARM_FEATURE_FMA which is defined when the fused multiply-accumulate
instructions are available for floating-point and/or Advanced SIMD
values.

__ARM_NEON_FP which is defined as a bit mask composed of the widths
of floating point values supported by the NEON hardware. These widths
are, as with __ARM_FP:
bit 1 - 16-bit half precision.
bit 2 - 32-bit single precision.
bit 3 - 64-bit double precision.

__ARM_WMMX which is defined where iwmmx operations are available
on the target.

As these macros may expand to something other than `1', we also update
cpp.texi to reflect this fact.

OK?

Thanks,

Matt

gcc/ChangeLog:
2012-05-28  Matthew Gretton-Dann  matthew.gretton-d...@arm.com
James Greenhalgh  james.greenha...@arm.com

* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add new built-ins.
(TARGET_FMA): New macro.
(TARGET_ARM_QBIT, TARGET_ARM_SAT): Likewise.
(TARGET_ARM_ARCH): Likewise.
(TARGET_ARM_ARCH_ISA_THUMB): Likewise.
(TARGET_V6M, TARGET_V7M): Likewise.
(TARGET_ARM_ARCH_PROFILE): Likewise.
(TARGET_ARM_FEATURE_LDREX): Likewise.
(TARGET_ARM_FP, TARGET_NEON_FP): Likewise.
(ARM_MIN_ENUM_SIZE): Likewise.
* config/arm/arm.c (arm_file_start): Refactor appropriately.
(base_architecture): New enumeration.
(arm_base_arch): New global variable.
(processors): Add field base_arch.
(ARM_ARCH, ARM_CORE): Adjust accordingly.
(arm_option_override): Add initialization of arm_base_arch.
* doc/cpp.texi (system-specific predefined macros.): Change.

gcc/testsuite/ChangeLog:
2012-05-28  Matthew Gretton-Dann  matthew.gretton-d...@arm.com
James Greenhalgh  james.greenha...@arm.com

* gcc.target/arm/ftest-support-arm.h New testcase.
* gcc.target/arm/ftest-support-thumb.h Likewise.
* gcc.target/arm/ftest-support.h Likewise.
* gcc.target/arm/ftest-armv4-arm.c: Likewise.
* gcc.target/arm/ftest-armv4t-arm.c: Likewise.
* gcc.target/arm/ftest-armv4t-thumb.c: Likewise.
* gcc.target/arm/ftest-armv5t-arm.c Likewise.
* gcc.target/arm/ftest-armv5t-thumb.c Likewise.
* gcc.target/arm/ftest-armv5te-arm.c: Likewise.
* gcc.target/arm/ftest-armv5te-thumb.c: Likewise.
* gcc.target/arm/ftest-armv6-arm.c Likewise.
* gcc.target/arm/ftest-armv6-thumb.c Likewise.
* gcc.target/arm/ftest-armv6k-arm.c Likewise.
* gcc.target/arm/ftest-armv6k-thumb.c Likewise.
* gcc.target/arm/ftest-armv6m-thumb.c: Likewise.
* gcc.target/arm/ftest-armv6t2-arm.c: Likewise.
* gcc.target/arm/ftest-armv6t2-thumb.c: Likewise.
* gcc.target/arm/ftest-armv6z-arm.c: Likewise.
* gcc.target/arm/ftest-armv6z-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7a-arm.c Likewise.
* gcc.target/arm/ftest-armv7a-thumb.c Likewise.
* gcc.target/arm/ftest-armv7m-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7em-thumb.c: Likewise.
* gcc.target/arm/ftest-armv7r-arm.c Likewise.
* gcc.target/arm/ftest-armv7r-thumb.c Likewise.
 

[ARM Patch 2/n]PR53447: optimizations of 64bit ALU operation with constant

2012-05-28 Thread Carrot Wei
Hi

This is the second part of the patches that deals with 64bit and. It directly
extends the patterns anddi3, anddi3_insn and anddi3_neon to handle 64bit
constant operands.

Tested on arm qemu without regression.

OK for trunk?

thanks
Carrot

2012-05-28  Wei Guozhi  car...@google.com

PR target/53447
* gcc.target/arm/pr53447-2.c: New testcase.


2012-05-28  Wei Guozhi  car...@google.com

PR target/53447
* config/arm/arm-protos.h (const_ok_for_anddi): New prototype.
* config/arm/arm.c (const_ok_for_anddi): New function.
* config/arm/constraints.md (De): New constraint.
* config/arm/predicates.md (arm_anddi_operand): New predicate.
(arm_immediate_anddi_operand): Likewise.
(anddi_operand): Likewise.
* config/arm/arm.md (anddi3): Extend it to handle 64bit constants.
(anddi3_insn): Likewise.
* config/arm/neon.md (anddi3_neon): Likewise.



Index: testsuite/gcc.target/arm/pr53447-2.c
===
--- testsuite/gcc.target/arm/pr53447-2.c(revision 0)
+++ testsuite/gcc.target/arm/pr53447-2.c(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options -O2 }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not mov } } */
+
+void t0p(long long * p)
+{
+  *p = 0x10002;
+}
Index: config/arm/arm.c
===
--- config/arm/arm.c(revision 187927)
+++ config/arm/arm.c(working copy)
@@ -2497,6 +2497,18 @@
 }
 }

+/* Return TRUE if int I is a valid immediate constant used by pattern
+   anddi3_insn.  */
+int
+const_ok_for_anddi (HOST_WIDE_INT i)
+{
+  HOST_WIDE_INT high = ARM_SIGN_EXTEND ((i  32)  0x);
+  HOST_WIDE_INT low = ARM_SIGN_EXTEND (i  0x);
+
+  return (TARGET_32BIT  (const_ok_for_arm (low) || const_ok_for_arm (~low))
+  (const_ok_for_arm (high) || const_ok_for_arm (~high)));
+}
+
 /* Emit a sequence of insns to handle a large constant.
CODE is the code of the operation required, it can be any of SET, PLUS,
IOR, AND, XOR, MINUS;
Index: config/arm/arm-protos.h
===
--- config/arm/arm-protos.h (revision 187927)
+++ config/arm/arm-protos.h (working copy)
@@ -47,6 +47,7 @@
 extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
 extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern int const_ok_for_anddi (HOST_WIDE_INT);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
Index: config/arm/neon.md
===
--- config/arm/neon.md  (revision 187927)
+++ config/arm/neon.md  (working copy)
@@ -774,9 +774,9 @@
 )

 (define_insn anddi3_neon
-  [(set (match_operand:DI 0 s_register_operand =w,w,?r,?r,?w,?w)
-(and:DI (match_operand:DI 1 s_register_operand %w,0,0,r,w,0)
-   (match_operand:DI 2 neon_inv_logic_op2 w,DL,r,r,w,DL)))]
+  [(set (match_operand:DI 0 s_register_operand =w,w,?r,?r,?w,?w,?r,?r)
+(and:DI (match_operand:DI 1 s_register_operand %w,0,0,r,w,0,0,r)
+   (match_operand:DI 2 anddi_operand w,DL,r,r,w,DL,De,De)))]
   TARGET_NEON
 {
   switch (which_alternative)
@@ -788,12 +788,14 @@
 DImode, 1, VALID_NEON_QREG_MODE (DImode));
 case 2: return #;
 case 3: return #;
+case 6: return #;
+case 7: return #;
 default: gcc_unreachable ();
 }
 }
-  [(set_attr neon_type neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1)
-   (set_attr length *,*,8,8,*,*)
-   (set_attr arch nota8,nota8,*,*,onlya8,onlya8)]
+  [(set_attr neon_type neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1,*,*)
+   (set_attr length *,*,8,8,*,*,8,8)
+   (set_attr arch nota8,nota8,*,*,onlya8,onlya8,*,*)]
 )

 (define_insn ornmode3_neon
Index: config/arm/constraints.md
===
--- config/arm/constraints.md   (revision 187927)
+++ config/arm/constraints.md   (working copy)
@@ -29,7 +29,7 @@
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, De, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -251,6 +251,12 @@
   (match_test TARGET_32BIT  arm_const_double_inline_cost (op) == 4
!(optimize_size || arm_ld_sched

+(define_constraint De
+ @internal
+  In ARM/Thumb-2 state a const_int that can be used by anddi3_insn.  
+ (and (match_code const_int)
+  (match_test TARGET_32BIT  const_ok_for_anddi (ival
+
 

Re: [driver, LTO Patch]: Resurrect user specs support

2012-05-28 Thread Joseph S. Myers
On Mon, 28 May 2012, Christian Bruel wrote:

 I shared the same concern, however, after playing bits with spec toys, I
 couldn't a find a way to get a % switch recognition failure, since the
 switches passed on the command line at this point are already validated
 if necessary.

Suppose with the existing sources an option (in a .opt file) is matched by 
a $ spec, and not by any other spec.  Will it be rejected by the driver?  
It shouldn't be.  Are you saying there is some pre-existing bug here, or 
that % validation happens in more than one place so some setting of 
validated is redundant but the code still works correctly?

-- 
Joseph S. Myers
jos...@codesourcery.com


[C++ Patch] PR 53503

2012-05-28 Thread Paolo Carlini

Hi,

apparently we have serious troubles with cmath and -std=c++11 and 
-fno-trapping-math because LTGT_EXPR is unhandled in 
potential_constant_expression_1. Thus, unless we have sound reasons to 
*not* handle it together with all the other *_EXPR, I think the below 
could safely go in mainline and 4_7-branch. Bootstrapped and tested 
x86_64-linux.


Ok?

Thanks,
Paolo.

///
gcc/cp
2012-05-28  Paolo Carlini  paolo.carl...@oracle.com

PR c++/53503
* semantics.c (potential_constant_expression_1): Handle LTGT_EXPR.

libstdc++-v3
2012-05-28  Paolo Carlini  paolo.carl...@oracle.com

PR c++/53503
* testsuite/26_numerics/headers/cmath/53503.cc: New.
Index: libstdc++-v3/testsuite/26_numerics/headers/cmath/53503.cc
===
--- libstdc++-v3/testsuite/26_numerics/headers/cmath/53503.cc   (revision 0)
+++ libstdc++-v3/testsuite/26_numerics/headers/cmath/53503.cc   (revision 0)
@@ -0,0 +1,21 @@
+// { dg-options -std=gnu++11 -fno-trapping-math }
+// { dg-do compile }
+
+// Copyright (C) 2012 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// http://www.gnu.org/licenses/.
+
+#include cmath
Index: gcc/cp/semantics.c
===
--- gcc/cp/semantics.c  (revision 187917)
+++ gcc/cp/semantics.c  (working copy)
@@ -8487,6 +8487,7 @@ potential_constant_expression_1 (tree t, bool want
 case UNGT_EXPR:
 case UNGE_EXPR:
 case UNEQ_EXPR:
+case LTGT_EXPR:
 case RANGE_EXPR:
 case COMPLEX_EXPR:
   want_rval = true;


Re: [RFA/ARM] Add ACLE Predefined macro support

2012-05-28 Thread Joseph S. Myers
On Mon, 28 May 2012, Matthew Gretton-Dann wrote:

 This patch adds a variety of predefined macros to reveal the presence of
 various features of the ARM architecture.  These are detailed in the ARM
 C Language Extensions specification, available here:
 http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053-/index.html

Are there any plans to implement the change in __fp16 semantics in this 
document (single rounding for conversion from double to __fp16, whereas 
the specification previously implemented was double rounding)?  And, more 
generally, the various features in the document not currently implemented 
in GCC or implemented differently from the specification?

 __ARM_FEATURE_FMA which is defined when the fused multiply-accumulate
 instructions are available for floating-point and/or Advanced SIMD
 values.

Note that the ARM port is currently lacking the fma instruction patterns 
to implement the __builtin_fma* built-in functions for processors with 
those instructions.  Support for those would be a straightforward and 
useful addition to GCC.

Is there a reason the ACLE doesn't include a predefined macro to say 
whether registers d16-d31 are known at compile time to be available?  That 
would occasionally be useful (see my comments in 
http://sourceware.org/ml/libc-ports/2012-04/msg00087.html, for example).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [driver, LTO Patch]: Resurrect user specs support

2012-05-28 Thread Christian Bruel


On 05/28/2012 01:11 PM, Joseph S. Myers wrote:
 On Mon, 28 May 2012, Christian Bruel wrote:
 
 I shared the same concern, however, after playing bits with spec toys, I
 couldn't a find a way to get a % switch recognition failure, since the
 switches passed on the command line at this point are already validated
 if necessary.
 
 Suppose with the existing sources an option (in a .opt file) is matched by 
 a $ spec, and not by any other spec.  Will it be rejected by the driver?  
 It shouldn't be. 

indeed, it's not rejected if it is present in a .opt file. I was
concerned that it will not be rejected even if not in any .opt (or now
in --specs). Which was what the validated setting seemed to imply.

Should it be rejected ? probably. But this is not implied by the --spec
changes.

 Are you saying there is some pre-existing bug here, or
 that % validation happens in more than one place so some setting of 
 validated is redundant but the code still works correctly?
 

I think the check

 if (! switches[i].validated)
  error

is already done when we process the do_spec for user specs.

It seems that there is no need to check for user option and set
'validated' in the cases ':,'', in do_spec_1 because if the switch
was not valid (not present in any .opt and not present in a user --spec)
it would already have been rejected.


Thanks

Christian


Re: [cxx-conversion] New Hash Table (issue6244048)

2012-05-28 Thread Jakub Jelinek
On Fri, May 25, 2012 at 02:42:39PM -0700, Lawrence Crowl wrote:
 On 5/24/12, Jakub Jelinek ja...@redhat.com wrote:
  On Thu, May 24, 2012 at 09:43:42AM -0700, Lawrence Crowl wrote:
   Add a type-safe hash table, typed_htab.  Uses of this table
   replace uses of libiberty's htab_t.  The benefits include less
   boiler-plate code, full type safety, and improved performance.
 
  You haven't looked at the most important problem of that approach
  - code bloat.
 
 Are you claiming that the size of the binary is more important than
 run-time performance, type safety, and source code size?

Runtime performance goes in hand with the size of the binary, at least
size of frequently used code.  By converting just a couple of hash tables
you can't really measure it, you'd need to convert a significant number of
them, then you can see what effect it has on runtime performance.
As said earlier, GCC has lots of hash tables, and many of them are used
in performance critical code, by increasing the I-cache footprint of
that performance criticial code there is risk of reducing performance.
The common C++ programming techniques often lead to significant code bloat
which really shouldn't be ignored.

Jakub


Re: [cxx-conversion] New Hash Table (issue6244048)

2012-05-28 Thread Jakub Jelinek
On Fri, May 25, 2012 at 05:43:20PM -0500, Gabriel Dos Reis wrote:
 On Fri, May 25, 2012 at 4:52 PM, Lawrence Crowl cr...@google.com wrote:
 
  Personally, I would rather see if we can take advantage of C++
  features to reduce garbage and then use the Boehm collector.
  There is too much manual management with GTY, and I'd rather the
  compiler leverage mainstream practice rather than depart from it.
 
 I could not agree more.

I could not agree less, replacing a nicely precise garbage collector
for a conservative collector?  Ugh.  I realy want a deterministic compiler,
not one where bugs won't be really reproduceable because with ASLR
the conservative collector collects or not collects something at some point.

Jakub


Re: [AARCH64] [PATCH 3/3] AArch64 Port

2012-05-28 Thread Tejas Belagod


Hi Richard,

Thanks for your comments. Some questions inline below.

Richard Sandiford wrote:

Marcus Shawcroft marcus.shawcr...@arm.com writes:

This patch adds an implementation of integer iterators.


Nice.  A few comments from an onlooker (on top of what Stephen said).


+/* Since GCC does not construct a table of valid constants,
+   we have to accept any int as valid.  No cross-checking can
+   be done.  */
+static int
+find_int (const char *name)
+{
+  char *endptr;
+  int ret;
+
+  if (ISDIGIT (*name))
+{
+  ret = strtol (name, endptr, 0);
+  gcc_assert (*endptr == '\0');


I think this should be an error rather than an assert.


+/* Stand-alone int iterator usage-checking function.  */
+static bool
+uses_int_iterator_p (rtx x, struct mapping *iterator, int opno)
+{
+  int i;
+  for (i=0; i  num_int_iterator_data; i++)
+if (int_iterator_data[i].iterator-group == iterator-group 
+   int_iterator_data[i].iterator-index == iterator-index)


Formatting:  should be at the beginning of the second line.


+  {
+   /* Found an existing entry. Check if X is in its list.  */
+   struct int_iterator_mapping it = int_iterator_data[i];
+   int j;
+
+   for (j=0; j  it.num_rtx; j++)
+   {
+ if (it.rtxs[j].x == x  it.rtxs[j].opno == opno)
+   return true;
+   }


Formatting: redundant { ... }.

It might be easier to store a pointer to XEXP (x, opno) than storing
x and opno separately.


+  }
+  return false;
+}
+
 /* Map a code or mode attribute string P to the underlying string for
ITERATOR and VALUE.  */
 
@@ -341,7 +414,9 @@

   x = rtx_alloc (bellwether_code);
   memcpy (x, original, RTX_CODE_SIZE (bellwether_code));
 
-  /* Change the mode or code itself.  */

+  /* Change the mode or code itself.
+ For int iterators, apply_iterator () does nothing. This is
+ because we want to apply int iterators to operands below.  */


The way I imagined this working is that we'd just walk a list of
rtx * pointers for the current iterator and substitute the current
iterator value.  Then we'd take a deep copy of the rtx once all
iterators had been applied.  Checking every operand against the
substitution table seems a bit round-about.



I understand how this would work for mode and code iterators, but I'm a 
bit confused about how it would for int iterators. Don't we have to 
traverse each operand to figure out which ones to substitute for an int 
iterator value? Also, when you say take a deep copy after all the 
iterators have been applied, do you mean code, mode and int iterators or 
do you mean values of a particular iterator? As I understand the current 
implementation, mode and code iterators use placeholder integral 
constants that are replaced with actual iterator values during the rtx 
traverse. If we take a deep copy after the replacement, won't we lose 
these placeholder codes?


Thanks,
Tejas.


It'd be good to do the same for codes and modes, but I'll volunteer
to do that as a follow-up.


+/* Add to triplet-database for int iterators.  */
+static void
+add_int_iterator (struct mapping *iterator, rtx x, int opno)
+{
+
+  /* Find iterator in int_iterator_data. If already present,
+ add this R to its list of rtxs. If not present, create
+ a new entry for INT_ITERATOR_DATA and add the R to its
+ rtx list.  */
+  int i;
+  for (i=0; i  num_int_iterator_data; i++)
+if (int_iterator_data[i].iterator-index == iterator-index)
+  {
+   /* Found an existing entry. Add rtx to this iterator's list.  */
+   int_iterator_data[i].rtxs =
+   XRESIZEVEC (struct rtx_list,
+   int_iterator_data[i].rtxs,
+   int_iterator_data[i].num_rtx + 1);
+   int_iterator_data[i].rtxs[int_iterator_data[i].num_rtx].x = x;
+   int_iterator_data[i].rtxs[int_iterator_data[i].num_rtx].opno = opno;
+   int_iterator_data[i].num_rtx++;
+   return;
+  }
+
+  /* New INT_ITERATOR_DATA entry.  */
+  if (num_int_iterator_data == 0)
+int_iterator_data = XNEWVEC (struct int_iterator_mapping, 1);
+  else
+int_iterator_data = XRESIZEVEC (struct int_iterator_mapping,
+   int_iterator_data,
+   num_int_iterator_data + 1);
+  int_iterator_data[num_int_iterator_data].iterator = iterator;
+  int_iterator_data[num_int_iterator_data].rtxs = XNEWVEC (struct rtx_list, 1);
+  int_iterator_data[num_int_iterator_data].rtxs[0].x = x;
+  int_iterator_data[num_int_iterator_data].rtxs[0].opno = opno;
+  int_iterator_data[num_int_iterator_data].num_rtx = 1;
+  num_int_iterator_data++;
+}


VECs might be better here.


@@ -1057,14 +1227,30 @@
XWINT (return_rtx, i) = tmp_wide;
break;
 
-  case 'i':

   case 'n':
-   read_name (name);
validate_const_int (name.string);
tmp_int = atoi (name.string);
XINT (return_rtx, i) = tmp_int;
break;
-

Re: [RFA/ARM] Add ACLE Predefined macro support

2012-05-28 Thread Matthew Gretton-Dann

On 28/05/12 12:27, Joseph S. Myers wrote:

On Mon, 28 May 2012, Matthew Gretton-Dann wrote:


This patch adds a variety of predefined macros to reveal the presence of
various features of the ARM architecture.  These are detailed in the ARM
C Language Extensions specification, available here:
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053-/index.html


Are there any plans to implement the change in __fp16 semantics in this
document (single rounding for conversion from double to __fp16, whereas
the specification previously implemented was double rounding)?  And, more
generally, the various features in the document not currently implemented
in GCC or implemented differently from the specification?


Yes.  We have these implemented against 4.7 internally, and are slowly 
rebasing these to trunk.  Unfortunately, I have no timescale for when these 
will be released.



__ARM_FEATURE_FMA which is defined when the fused multiply-accumulate
instructions are available for floating-point and/or Advanced SIMD
values.


Note that the ARM port is currently lacking the fma instruction patterns
to implement the __builtin_fma* built-in functions for processors with
those instructions.  Support for those would be a straightforward and
useful addition to GCC.


I have a patchset currently under test that will add FMA support to the ARM 
backend (both for VFP and Neon).  Hopefully this will be sent for community 
review sometime this week.



Is there a reason the ACLE doesn't include a predefined macro to say
whether registers d16-d31 are known at compile time to be available?  That
would occasionally be useful (see my comments in
http://sourceware.org/ml/libc-ports/2012-04/msg00087.html, for example).


ACLE is interested in C language extensions.  So, in general, it only notes 
architecture features that the presence/absence of would change how you 
would write your C code.


The number of registers available in the VFP unit is of no interest to a C 
programmer, and so ACLE doesn't provide a feature test macro for it.


ACLE does not provide support for those writing assembly directly.

I agree, however, that from a GCC+Binutils toolchain perspective, feature 
test macros for such features would be useful.


Thanks,

Matt


--
Matthew Gretton-Dann
Principal Engineer, PD Software - Tools, ARM Ltd



Re: [i386] New testcase (was: [rtl, patch] combine concat+shuffle)

2012-05-28 Thread Marc Glisse


Ping? The rest of the patch has been approved already.


On Thu, 10 May 2012, Marc Glisse wrote:


Hello,

could an i386 maintainer take a look at the following testcase?

gcc/testsuite/ChangeLog
2012-05-08  Marc Glisse  marc.gli...@inria.fr

* gcc.target/i386/shuf-concat.c: New test.


--- gcc.target/i386/shuf-concat.c   (revision 0)
+++ gcc.target/i386/shuf-concat.c   (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options -O -msse2 -mfpmath=sse } */
+
+typedef double v2df __attribute__ ((__vector_size__ (16)));
+
+v2df f(double d,double e){
+  v2df x={-d,d};
+  v2df y={-e,e};
+  return __builtin_ia32_shufpd(x,y,1);
+}
+
+/* { dg-final { scan-assembler-not \tv?shufpd\t } } */
+/* { dg-final { scan-assembler-times \tv?unpcklpd\t 1 } } */


The conversation on this patch started at 
http://gcc.gnu.org/ml/gcc-patches/2012-05/msg00504.html



On Tue, 8 May 2012, Marc Glisse wrote:


On Tue, 8 May 2012, Richard Sandiford wrote:


Marc Glisse marc.gli...@inria.fr writes:

Here is a new version.

gcc/ChangeLog
2012-05-08  Marc Glisse  marc.gli...@inria.fr

* simplify-rtx.c (simplify_binary_operation_1): Optimize shuffle
of concatenations.


OK, thanks.  I'll leave an x86 maintainer to review the testcase,
but it looks like it'll need some markup to ensure an SSE target.


Oups, I'd thought about that, then completely forgot. For 64 bits, it 
always works. For 32 bits, it requires -msse2 -mfpmath=sse (without 
-mfpmath=sse we can still test for shufpd, but apparently not unpcklpd, I 
could remove that second test if people prefer, as it isn't important). 
Since this is a compile-only test, I think this would be enough:


/* { dg-options -O -msse2 -mfpmath=sse } */


Note to self: if you want to grep for shuf in the asm, don't put shuf
in the name of the file...


Yeah :-)  For MIPS tests I tend to add \t to the beginning of the 
regexp.

(And to the end if possible.)


Good idea. I was trying to make the check as wide as possible, but that's 
not so useful. Attached a new version of the testcase.


--
Marc Glisse


Re: [cxx-conversion] New Hash Table (issue6244048)

2012-05-28 Thread Vladimir Makarov

On 05/25/2012 01:10 PM, Paweł Sikora wrote:

On Friday 25 of May 2012 11:50:13 Gabriel Dos Reis wrote:

On Fri, May 25, 2012 at 11:44 AM, Paweł Sikorapl...@agmk.net  wrote:


so, why you just don't use the hash table implementation from libstdc++?

we have agreed on C++03 as a bootstrap compiler.
There is unfortunately no hash table in C++03.

can't you use implementation from tr1 for c++98/03 mode?

It would be interesting to see the result of using C++ standard hash 
tables and I hope somebody will finally try this.  But I doubt the 
performance results will be better.


C++ standrad hash tables is based on buckets usage.  Libibery hashtables 
has no buckets.  That was a major idea for such implementation more 20 
years ago and including it in gcc (more 13 years ago).  Hashtable 
without buckets permits 2-3 times more entries for the same space than 
hashtable with the buckets and that, I guess, compensates a slightly 
bigger collision rate when the buckets are not used.  It also makes the 
table entry search code is very compat, simple, and fast.




[committed] Add testcase for PR tree-optimization/53505

2012-05-28 Thread Jakub Jelinek
Hi!

As we have no short testcase for PR53438, I've committed
the following testcase as obvious to trunk and 4.7.

2012-05-28  Jakub Jelinek  ja...@redhat.com

PR tree-optimization/53505
* c-c++-common/torture/pr53505.c: New test.

--- gcc/testsuite/c-c++-common/torture/pr53505.c.jj 2012-05-28 
16:20:13.927753639 +0200
+++ gcc/testsuite/c-c++-common/torture/pr53505.c2012-05-28 
16:20:52.832528256 +0200
@@ -0,0 +1,42 @@
+/* PR tree-optimization/53505 */
+/* { dg-do run } */
+
+#include stdbool.h
+
+struct A
+{
+  unsigned int a;
+  unsigned char c1, c2;
+  bool b1 : 1;
+  bool b2 : 1;
+  bool b3 : 1;
+};
+
+void
+foo (const struct A *x, int y)
+{
+  int s = 0, i;
+  for (i = 0; i  y; ++i)
+{
+  const struct A a = x[i];
+  s += a.b1 ? 1 : 0;
+}
+  if (s != 0)
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  struct A x[100];
+  int i;
+  __builtin_memset (x, -1, sizeof (x));
+  for (i = 0; i  100; i++)
+{
+  x[i].b1 = false;
+  x[i].b2 = false;
+  x[i].b3 = false;
+}
+  foo (x, 100);
+  return 0;
+}

Jakub


Re: [PATCH] PR bootstrap/53459 - unused local typedef when building on altivec

2012-05-28 Thread Dominique Dhumieres
I have regstapped r187893 with the following patch

[karma] gcc/darwin_buildw% diff -up ../_gcc_clean/libcpp/lex.c 
../work/libcpp/lex.c
--- ../_gcc_clean/libcpp/lex.c  2012-05-25 08:54:05.0 +0200
+++ ../work/libcpp/lex.c2012-05-27 13:25:08.0 +0200
@@ -592,7 +592,8 @@ search_line_fast (const uchar *s, const 
 
 union {
   vc v;
-  unsigned long l[N];
+  /* Statically assert that N is 2 or 4.  */
+  unsigned long l[(N == 2 || N == 4) ? N : -1];
 } u;
 unsigned long l, i = 0;
 

without related regression.

 You can simply rename the 'l' array to the error you want to output, so you 
 get an error like:

 error: size of array 'the_vector_size_has_to_be_the_size_of_2_or_4_long' is 
 negative

I am not convinced that will help anyone trying to extend the actual
implementation for an altivec platform with N != 2 or 4 (though I
seriously doubt that'll append). May be the comment could be made
more explicit along the line
Generates error: size of array 'l' is negative at compile time
if the vector size is not equal to be the size of 2 or 4 long
(feel free to convert Frenglish to native English).

Dominique


Re: [i386] New testcase (was: [rtl, patch] combine concat+shuffle)

2012-05-28 Thread Uros Bizjak
On Mon, May 28, 2012 at 3:37 PM, Marc Glisse marc.gli...@inria.fr wrote:

 Ping? The rest of the patch has been approved already.


 On Thu, 10 May 2012, Marc Glisse wrote:

 Hello,

 could an i386 maintainer take a look at the following testcase?

 gcc/testsuite/ChangeLog
 2012-05-08  Marc Glisse  marc.gli...@inria.fr

        * gcc.target/i386/shuf-concat.c: New test.


 --- gcc.target/i386/shuf-concat.c       (revision 0)
 +++ gcc.target/i386/shuf-concat.c       (revision 0)
 @@ -0,0 +1,13 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -msse2 -mfpmath=sse } */
 +
 +typedef double v2df __attribute__ ((__vector_size__ (16)));
 +
 +v2df f(double d,double e){
 +  v2df x={-d,d};
 +  v2df y={-e,e};
 +  return __builtin_ia32_shufpd(x,y,1);
 +}
 +
 +/* { dg-final { scan-assembler-not \tv?shufpd\t } } */
 +/* { dg-final { scan-assembler-times \tv?unpcklpd\t 1 } } */


 The conversation on this patch started at
 http://gcc.gnu.org/ml/gcc-patches/2012-05/msg00504.html


 On Tue, 8 May 2012, Marc Glisse wrote:

 On Tue, 8 May 2012, Richard Sandiford wrote:

 Marc Glisse marc.gli...@inria.fr writes:

 Here is a new version.

 gcc/ChangeLog
 2012-05-08  Marc Glisse  marc.gli...@inria.fr

        * simplify-rtx.c (simplify_binary_operation_1): Optimize shuffle
        of concatenations.


 OK, thanks.  I'll leave an x86 maintainer to review the testcase,
 but it looks like it'll need some markup to ensure an SSE target.


 Oups, I'd thought about that, then completely forgot. For 64 bits, it
 always works. For 32 bits, it requires -msse2 -mfpmath=sse (without
 -mfpmath=sse we can still test for shufpd, but apparently not unpcklpd, I
 could remove that second test if people prefer, as it isn't important).
 Since this is a compile-only test, I think this would be enough:

 /* { dg-options -O -msse2 -mfpmath=sse } */

 Note to self: if you want to grep for shuf in the asm, don't put
 shuf
 in the name of the file...


 Yeah :-)  For MIPS tests I tend to add \t to the beginning of the
 regexp.
 (And to the end if possible.)


 Good idea. I was trying to make the check as wide as possible, but that's
 not so useful. Attached a new version of the testcase.

Please add \[ \t\] at the end of add string instead of only \t.

OK with that change.

Thanks,
Uros.


Committed: typos in atomic patterns docs, md.texi

2012-05-28 Thread Hans-Peter Nilsson
Committed as obvious.

gcc:
* doc/md.texi (Standard Names): Fix typos in documentation of atomic 
patterns.

Index: doc/md.texi
===
--- doc/md.texi (revision 187934)
+++ doc/md.texi (working copy)
@@ -5885,7 +5885,7 @@ from a compare-and-swap operation, if de
 @itemx @samp{sync_old_ior@var{mode}}, @samp{sync_old_and@var{mode}}
 @itemx @samp{sync_old_xor@var{mode}}, @samp{sync_old_nand@var{mode}}
 
-These patterns are emit code for an atomic operation on memory,
+These patterns emit code for an atomic operation on memory,
 and return the value that the memory contained before the operation.
 Operand 0 is the result value, operand 1 is the memory on which the
 atomic operation is performed, and operand 2 is the second operand
@@ -6037,7 +6037,7 @@ performed.  Operand 1 is the second oper
 Operand 2 is the memory model to be used by the operation.
 
 If these patterns are not defined, attempts will be made to use legacy
-@code{sync} patterns, or equivilent patterns which return a result.  If
+@code{sync} patterns, or equivalent patterns which return a result.  If
 none of these are available a compare-and-swap loop will be used.
 
 @cindex @code{atomic_fetch_add@var{mode}} instruction pattern
@@ -6079,7 +6079,7 @@ performed.  Operand 2 is the second oper
 Operand 3 is the memory model to be used by the operation.
 
 If these patterns are not defined, attempts will be made to use legacy
-@code{sync} patterns, or equivilent patterns which return the result before
+@code{sync} patterns, or equivalent patterns which return the result before
 the operation followed by the arithmetic operation required to produce the
 result.  If none of these are available a compare-and-swap loop will be
 used.

brgds, H-P


[SH] PR 52941 - Add support for movco.l / movli.l atomics on SH4A

2012-05-28 Thread Oleg Endo
Hello,

The attached patch is the same as the last proposed patch in the PR.
Re-tested it on sh-sim against rev 187914 with -msoft-atomic and
-mhard-atomic enabled, as well with 'make info dvi pdf'.

I was hoping that after recent reload patches the R0 spill failure
problem
(see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50751#c29 ) might go
away, but it didn't.  The gcc.c-torture/compile/sync-1.c test still
fails when -mhard-atomic is enabled.
Actually this patch does not introduce the spill failure problem, the
sync-1 test case just makes it obvious.  The actual cause comes from the
patches of PR 50751 and (as far as I understand) some missing cases in
reload, which should be addressed separately, I think.

OK to apply?

Cheers,
Oleg

ChangeLog:

PR target/52941
* config/sh/predicates.md (atomic_arith_operand, 
atomic_logical_operand): New predicates.
* config/sh/sh.c (sh_option_override): Check atomic options.
* config/sh/sh.h (TARGET_ANY_ATOMIC, UNSUPPORTED_ATOMIC_OPTIONS,
UNSUPPORTED_HARD_ATOMIC_CPU): New macros.
(DRIVER_SELF_SPECS): Use UNSUPPORTED_ATOMIC_OPTIONS and
UNSUPPORTED_HARD_ATOMIC_CPU.
* config/sh/sync.md: Update description comments.
(I12): New mode iterator.
(fetchop_predicate, fetchop_constraint): New code attributes.
(atomic_compare_and_swapsi_hard, 
atomic_compare_and_swapmode_hard, atomic_exchangesi_hard,
atomic_exchangemode_hard, atomic_fetch_fetchop_namesi_hard,
atomic_fetch_fetchop_namemode_hard, 
atomic_fetch_nandsi_hard, atomic_fetch_nandmode_hard,
atomic_fetchop_name_fetchsi_hard,
atomic_fetchop_name_fetchmode_hard,
atomic_nand_fetchsi_hard, atomic_nand_fetchmode_hard,
atomic_test_and_set_hard): New insns.
(atomic_compare_and_swapmode_soft, atomic_exchangemode_soft,
atomic_fetch_fetchop_namemode_soft,
atomic_fetch_nandmode_soft,
atomic_fetchop_name_fetchmode_soft,
atomic_nand_fetchmode_soft, atomic_test_and_set_soft): Use 
same formatting for the first line of the asm block as in new
insns above.
(atomic_compare_and_swapmode, atomic_exchangemode,
atomic_fetch_fetchop_namemode, 
atomic_fetchop_name_fetchmode, atomic_test_and_set):
Integrate new *_hard insns into expanders.
* config/sh/sh.opt (mhard-atomic): New option.
* doc/invoke.texi (SH Options): Document it.



Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi	(revision 187297)
+++ gcc/doc/invoke.texi	(working copy)
@@ -884,7 +884,7 @@
 -mprefergot  -musermode -multcost=@var{number} -mdiv=@var{strategy} @gol
 -mdivsi3_libfunc=@var{name} -mfixed-range=@var{register-range} @gol
 -mindexed-addressing -mgettrcost=@var{number} -mpt-fixed @gol
--maccumulate-outgoing-args -minvalid-symbols -msoft-atomic @gol
+-maccumulate-outgoing-args -minvalid-symbols -msoft-atomic -mhard-atomic @gol
 -mbranch-cost=@var{num} -mcbranchdi -mcmpeqdi -mfused-madd -mpretend-cmove @gol
 -menable-tas}
 
@@ -18053,10 +18053,22 @@
 Generate GNU/Linux compatible gUSA software atomic sequences for the atomic
 built-in functions.  The generated atomic sequences require support from the 
 interrupt / exception handling code of the system and are only suitable for
-single-core systems.  They will not perform correctly on multi-core systems.
+single-core systems.  They will not operate correctly on multi-core systems.
 This option is enabled by default when the target is @code{sh-*-linux*}.
+When the target is SH4A, this option will also partially utilize the hardware
+atomic instructions @code{movli.l} and @code{movco.l} to create more
+efficient code.
 For details on the atomic built-in functions see @ref{__atomic Builtins}.
 
+@item -mhard-atomic
+@opindex hard-atomic
+Generate hardware atomic sequences for the atomic built-in functions.  This
+is only available on SH4A and is suitable for multi-core systems.  Code
+compiled with this option will also be compatible with gUSA aware
+interrupt / exception handling systems.  In contrast to the
+@option{-msoft-atomic} option this will only use the instructions
+@code{movli.l} and @code{movco.l} to create atomic sequences.
+
 @item -menable-tas
 @opindex menable-tas
 Generate the @code{tas.b} opcode for @code{__atomic_test_and_set}.
Index: gcc/config/sh/predicates.md
===
--- gcc/config/sh/predicates.md	(revision 187297)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -879,3 +879,22 @@
 }
   return 0;
 })
+
+;; The atomic_* operand predicates are used for the atomic patterns.
+;; Depending on the particular pattern some operands can be immediate
+;; values.  Using these predicates avoids the usage of 'force_reg' in the
+;; expanders.
+(define_predicate atomic_arith_operand
+  (ior (match_code 

Re: [PATCH preprocessor, diagnostics] PR preprocessor/53229 - Fix diagnostics location when pasting tokens

2012-05-28 Thread Jason Merrill

On 05/24/2012 03:18 PM, Dodji Seketeli wrote:

Like the below?


Yep, thanks.  The patch is OK.

Jason



Re: [driver, LTO Patch]: Resurrect user specs support

2012-05-28 Thread Joseph S. Myers
On Mon, 28 May 2012, Christian Bruel wrote:

 
 
 On 05/28/2012 01:11 PM, Joseph S. Myers wrote:
  On Mon, 28 May 2012, Christian Bruel wrote:
  
  I shared the same concern, however, after playing bits with spec toys, I
  couldn't a find a way to get a % switch recognition failure, since the
  switches passed on the command line at this point are already validated
  if necessary.
  
  Suppose with the existing sources an option (in a .opt file) is matched by 
  a $ spec, and not by any other spec.  Will it be rejected by the driver?  
  It shouldn't be. 
 
 indeed, it's not rejected if it is present in a .opt file. I was
 concerned that it will not be rejected even if not in any .opt (or now
 in --specs). Which was what the validated setting seemed to imply.
 
 Should it be rejected ? probably. But this is not implied by the --spec
 changes.

The existing rule is supposed to be: options are only accepted if in 
*both* a .opt file *and* a spec.  If not in a .opt file, the common 
machinery will reject them; if in a .opt file but not a spec, the driver's 
own validation machinery will reject them.

If the driver's own validation machinery isn't rejecting them, that 
indicates that some spec has handled them.  It's possible there's more 
than one piece of code relating to accepting such options and some such 
code is redundant.

(This can't be tested with options starting -f or -m because of the specs 
passing all such options to cc1.)

The new semantics are supposed to be, I think: an option in a .opt file is 
accepted if any spec matches it (same as now), an option not in a .opt 
file is only accepted if a user spec matches it and not simply because of 
a match from a built-in spec (where built-in specs are considered to 
include those generated by some of GCC's own runtime libraries).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [C++ Patch] PR 25137 (no -Wmissing-braces in -Wall version)

2012-05-28 Thread Jason Merrill

OK.

Jason


Port r184840 from gcc-4_6. (issue6259049)

2012-05-28 Thread Simon Baldwin
Port r184840 from gcc-4_6.

Forward-port r184840, contrib/testsuite-management/validate_failures.py fix
for cross-compilers, from gcc-4_6 to gcc-4_7.

Okay for google/integration and google/gcc-4_7-integration branches?


2012-05-28  Simon Baldwin  sim...@google.com

Port r184840 from gcc-4_6.

2012-03-02   Doug Kwan  dougk...@google.com

Backport r184357 from trunk

2012-02-17   Doug Kwan  dougk...@google.com

* contrib/testsuite-management/validate_failures.py
(GetMakefileValue): Check for cross compilers.


Index: contrib/testsuite-management/validate_failures.py
===
--- contrib/testsuite-management/validate_failures.py   (revision 187932)
+++ contrib/testsuite-management/validate_failures.py   (working copy)
@@ -146,7 +146,8 @@ def GetMakefileValue(makefile_name, valu
 def ValidBuildDirectory(builddir, target):
   if (not os.path.exists(builddir) or
   not os.path.exists('%s/Makefile' % builddir) or
-  not os.path.exists('%s/build-%s' % (builddir, target))):
+  (not os.path.exists('%s/build-%s' % (builddir, target)) and
+   not os.path.exists('%s/%s' % (builddir, target:
 return False
   return True
 

--
This patch is available for review at http://codereview.appspot.com/6259049


Re: Port r184840 from gcc-4_6. (issue6259049)

2012-05-28 Thread Simon Baldwin
On 28 May 2012 18:40, Simon Baldwin sim...@google.com wrote:

 Port r184840 from gcc-4_6.

 Forward-port r184840, contrib/testsuite-management/validate_failures.py fix
 for cross-compilers, from gcc-4_6 to gcc-4_7.

 Okay for google/integration and google/gcc-4_7-integration branches?

That should have read Okay for google/gcc-4_7?.  Turns out it's
already in the others.




 2012-05-28  Simon Baldwin  sim...@google.com

        Port r184840 from gcc-4_6.

        2012-03-02   Doug Kwan  dougk...@google.com

        Backport r184357 from trunk

                2012-02-17   Doug Kwan  dougk...@google.com

                * contrib/testsuite-management/validate_failures.py
                (GetMakefileValue): Check for cross compilers.


 Index: contrib/testsuite-management/validate_failures.py
 ===
 --- contrib/testsuite-management/validate_failures.py   (revision 187932)
 +++ contrib/testsuite-management/validate_failures.py   (working copy)
 @@ -146,7 +146,8 @@ def GetMakefileValue(makefile_name, valu
  def ValidBuildDirectory(builddir, target):
   if (not os.path.exists(builddir) or
       not os.path.exists('%s/Makefile' % builddir) or
 -      not os.path.exists('%s/build-%s' % (builddir, target))):
 +      (not os.path.exists('%s/build-%s' % (builddir, target)) and
 +       not os.path.exists('%s/%s' % (builddir, target:
     return False
   return True


 --
 This patch is available for review at http://codereview.appspot.com/6259049




--
Google UK Limited | Registered Office: Belgrave House, 76 Buckingham
Palace Road, London SW1W 9TQ | Registered in England Number: 3977902


Re: [AARCH64] [PATCH 3/3] AArch64 Port

2012-05-28 Thread Richard Sandiford
Tejas Belagod tbela...@arm.com writes:
 Hi Richard,

 Thanks for your comments. Some questions inline below.

 Richard Sandiford wrote:
 Marcus Shawcroft marcus.shawcr...@arm.com writes:
 This patch adds an implementation of integer iterators.
 
 Nice.  A few comments from an onlooker (on top of what Stephen said).
 
 +/* Since GCC does not construct a table of valid constants,
 +   we have to accept any int as valid.  No cross-checking can
 +   be done.  */
 +static int
 +find_int (const char *name)
 +{
 +  char *endptr;
 +  int ret;
 +
 +  if (ISDIGIT (*name))
 +{
 +  ret = strtol (name, endptr, 0);
 +  gcc_assert (*endptr == '\0');
 
 I think this should be an error rather than an assert.
 
 +/* Stand-alone int iterator usage-checking function.  */
 +static bool
 +uses_int_iterator_p (rtx x, struct mapping *iterator, int opno)
 +{
 +  int i;
 +  for (i=0; i  num_int_iterator_data; i++)
 +if (int_iterator_data[i].iterator-group == iterator-group 
 +   int_iterator_data[i].iterator-index == iterator-index)
 
 Formatting:  should be at the beginning of the second line.
 
 +  {
 +   /* Found an existing entry. Check if X is in its list.  */
 +   struct int_iterator_mapping it = int_iterator_data[i];
 +   int j;
 +
 +   for (j=0; j  it.num_rtx; j++)
 +   {
 + if (it.rtxs[j].x == x  it.rtxs[j].opno == opno)
 +   return true;
 +   }
 
 Formatting: redundant { ... }.
 
 It might be easier to store a pointer to XEXP (x, opno) than storing
 x and opno separately.
 
 +  }
 +  return false;
 +}
 +
  /* Map a code or mode attribute string P to the underlying string for
 ITERATOR and VALUE.  */
  
 @@ -341,7 +414,9 @@
x = rtx_alloc (bellwether_code);
memcpy (x, original, RTX_CODE_SIZE (bellwether_code));
  
 -  /* Change the mode or code itself.  */
 +  /* Change the mode or code itself.
 + For int iterators, apply_iterator () does nothing. This is
 + because we want to apply int iterators to operands below.  */
 
 The way I imagined this working is that we'd just walk a list of
 rtx * pointers for the current iterator and substitute the current
 iterator value.  Then we'd take a deep copy of the rtx once all
 iterators had been applied.  Checking every operand against the
 substitution table seems a bit round-about.
 

 I understand how this would work for mode and code iterators, but I'm a 
 bit confused about how it would for int iterators.

Probably because of a typo, sorry.  I meant int * in the above.
At least, they'd be int * for int iterators and rtx for mode
and code iterators.

 Don't we have to 
 traverse each operand to figure out which ones to substitute for an int 
 iterator value? Also, when you say take a deep copy after all the 
 iterators have been applied, do you mean code, mode and int iterators or 
 do you mean values of a particular iterator? As I understand the current 
 implementation, mode and code iterators use placeholder integral 
 constants that are replaced with actual iterator values during the rtx 
 traverse. If we take a deep copy after the replacement, won't we lose 
 these placeholder codes?

If you don't convert codes and modes (and leave it to me), then you'd
probably need to apply all int interators first.  I expect it'd be easier
to convert modes and codes at the same time.

Richard


Re: [PATCH, tree-optimization] Fix for PR 52868

2012-05-28 Thread Igor Zamyatin
Ping?

On Sat, May 12, 2012 at 1:26 AM, Igor Zamyatin izamya...@gmail.com wrote:
 Ping?

 On Fri, Apr 27, 2012 at 4:42 PM, Igor Zamyatin izamya...@gmail.com wrote:
 On Wed, Apr 25, 2012 at 6:41 PM, Richard Guenther
 richard.guent...@gmail.com wrote:
 On Wed, Apr 25, 2012 at 4:32 PM, Igor Zamyatin izamya...@gmail.com wrote:
 On Wed, Apr 25, 2012 at 1:14 PM, Richard Guenther
 richard.guent...@gmail.com wrote:
 On Wed, Apr 25, 2012 at 10:56 AM, Igor Zamyatin izamya...@gmail.com 
 wrote:
 Hi all!

 I'd like to post for review the patch which makes some costs adjusting
 in get_computation_cost_at routine in ivopts part.
 As mentioned in the PR changes also fix the bwaves regression from PR 
 52272.
 Also changes introduce no degradations on spec2000/2006 and
 EEMBC1.1/2.0(this was measured on Atom) on x86


 Bootstrapped and regtested on x86. Ok to commit?

 I can't make sense of the patch and the comment does not help.

 +      diff_cost = cost.cost;
       cost.cost /= avg_loop_niter (data-current_loop);
 +      add_cost_val = add_cost (TYPE_MODE (ctype), data-speed);
 +      /* Do cost correction if address cost is small enough
 +         and difference cost is high enough.  */
 +      if (address_p  diff_cost  add_cost_val
 +           get_address_cost (symbol_present, var_present,
 +                               offset, ratio, cstepi,
 +                               TYPE_MODE (TREE_TYPE (utype)),
 +                               TYPE_ADDR_SPACE (TREE_TYPE (utype)),
 +                               speed, stmt_is_after_inc,
 +                               can_autoinc).cost = add_cost_val)
 +        cost.cost += add_cost_val;

 Please explain more thoroughly.  It also would seem to be better to add
 an extra case, as later code does

 For example for such code

   for (j=0; jM;j++) {
       for (i=0; iN; i++)
           sum += ptr-a[j][i] * ptr-c[k][i];
   }
  we currently have following gimple on x86 target (I provided a piece
 of all phase output):

           # ivtmp.13_30 = PHI ivtmp.13_31(3), ivtmp.13_33(7)
           D.1748_34 = (void *) ivtmp.13_30;
           D.1722_7 = MEM[base: D.1748_34, offset: 0B];
           D.1750_36 = ivtmp.27_28;
           D.1751_37 = D.1750_36 + ivtmp.13_30; -- we got
 non-invariant add which is not taken into account currently in cost
 model
           D.1752_38 = (void *) D.1751_37;
           D.1753_39 = (sizetype) k_8(D);
           D.1754_40 = D.1753_39 * 800;
           D.1723_9 = MEM[base: D.1752_38, index: D.1754_40, offset: 
 16000B];
           ...

  With proposed fix we produce:

           # ivtmp.14_30 = PHI ivtmp.14_31(3), 0(7)
           D.1749_34 = (struct S *) ivtmp.25_28;
           D.1722_7 = MEM[base: D.1749_34, index: ivtmp.14_30, offset: 0B];
           D.1750_35 = (sizetype) k_8(D);
           D.1751_36 = D.1750_35 * 800;
           D.1752_37 = ptr_6(D) + D.1751_36;
           D.1723_9 = MEM[base: D.1752_37, index: ivtmp.14_30, offset: 
 16000B];

 which is more effective on platforms where address cost is cheaper
 than cost of addition operation. That's basically what this adjustment
 is for.

 If we generally miss to account for the add then why is the adjustment
 conditional on diff_cost  add_cost and address_cost = add_cost?

 Is this a new heuristic or a fix for not accurately computing the cost for 
 the
 stmts we generate?

 I'd say this is closer to heuristic since diff_cost  add_cost is an
 attempt to catch the case with non-invariant add produced by pointer
 difference and address_cost =add_cost leaves the cases with cheap
 address operations


 Richard.

 So comment in the source code now looks as follows

 /* Do cost correction when address difference produces
   additional non-invariant add operation which is less
   profitable if address cost is cheaper than cost of add.  */


  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
     (symbol/var1/const parts may be omitted).  If we are looking for an
     address, find the cost of addressing this.  */
  if (address_p)
    return add_costs (cost,
                      get_address_cost (symbol_present, var_present,
                                        offset, ratio, cstepi,
                                        TYPE_MODE (TREE_TYPE (utype)),
                                        TYPE_ADDR_SPACE (TREE_TYPE 
 (utype)),
                                        speed, stmt_is_after_inc,
                                        can_autoinc));

 thus refactoring the code a bit would make it possible to CSE the
 get_address_cost
 call and eventually make it clearer what the code does.

 'offset' could be changed beetween two calls of get_address_cost so
 such refactoring looks useless.

 New patch (only the comment was changed) attached. Changelog was
 changed as well.


 Richard.


 Changelog:

  2012-04-26  Yuri Rumyantsev  yuri.rumyant...@intel.com

         * tree-ssa-loop-ivopts.c (get_computation_cost_at): Adjust
        cost model when address difference 

Re: [PATCH] Atom: Scheduler improvements for better imul placement

2012-05-28 Thread Igor Zamyatin
Ping?

On Sun, May 6, 2012 at 11:27 AM, Igor Zamyatin izamya...@gmail.com wrote:
 Ping. Could x86 maintainer(s) look at these changes?

 Thanks,
 Igor

 On Fri, Apr 20, 2012 at 4:04 PM, Igor Zamyatin izamya...@gmail.com wrote:
 On Tue, Apr 17, 2012 at 12:27 AM, Igor Zamyatin izamya...@gmail.com wrote:
 On Fri, Apr 13, 2012 at 4:20 PM, Andrey Belevantsev a...@ispras.ru wrote:
 On 13.04.2012 14:18, Igor Zamyatin wrote:

 On Thu, Apr 12, 2012 at 5:01 PM, Andrey Belevantseva...@ispras.ru
  wrote:

 On 12.04.2012 16:38, Richard Guenther wrote:


 On Thu, Apr 12, 2012 at 2:36 PM, Igor Zamyatinizamya...@gmail.com
  wrote:


 On Thu, Apr 12, 2012 at 4:24 PM, Richard Guenther
 richard.guent...@gmail.com    wrote:


 On Thu, Apr 12, 2012 at 2:00 PM, Alexander Monakovamona...@ispras.ru
  wrote:



 Can atom execute two IMUL in parallel?  Or what exactly is the
 pipeline
 behavior?



 As I understand from Intel's optimization reference manual, the
 behavior is as
 follows: if the instruction immediately following IMUL has shorter
 latency,
 execution is stalled for 4 cycles (which is IMUL's latency);
 otherwise,
 a
 4-or-more cycles latency instruction can be issued after IMUL without
 a
 stall.
 In other words, IMUL is pipelined with respect to other long-latency
 instructions, but not to short-latency instructions.



 It seems to be modeled in the pipeline description though:

 ;;; imul insn has 5 cycles latency
 (define_reservation atom-imul-32
                    atom-imul-1, atom-imul-2, atom-imul-3,
 atom-imul-4,
                     atom-port-0)

 ;;; imul instruction excludes other non-FP instructions.
 (exclusion_set atom-eu-0, atom-eu-1
               atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4)


 The main idea is quite simple:

 If we are going to schedule IMUL instruction (it is on the top of
 ready list) we try to find out producer of other (independent) IMUL
 instruction that is in ready list too. The goal is try to schedule
 such a producer to get another IMUL in ready list and get scheduling
 of 2 successive IMUL instructions.



 Why does that not happen without your patch?  Does it never happen
 without
 your patch or does it merely not happen for one EEMBC benchmark (can
 you provide a testcase?)?



 It does not happen because the scheduler by itself does not do such
 specific
 reordering.  That said, it is easy to imagine the cases where this patch
 will make things worse rather than better.

 Igor, why not try different subtler mechanisms like adjust_priority,
 which
 is get called when an insn is added to the ready list?  E.g. increase the
 producer's priority.

 The patch as is misses checks for NONDEBUG_INSN_P.  Also, why bail out
 when
 you have more than one imul in the ready list?  Don't you want to bump
 the
 priority of the other imul found?


 Could you provide some examples when this patch would harm the
 performance?


 I thought of the cases when the other ready insns can fill up the hole and
 that would be more beneficial because e.g. they would be on more critical
 paths than the producer of your second imul.  I don't know enough of Atom 
 to
 give an example -- maybe some long divisions?



 Sched_reorder was chosen since it is used in other ports and looks
 most suitable for such case, e.g. it provides access to the whole
 ready list.
 BTW, just increasing producer's priority seems to be more risky in
 performance sense - we can incorrectly start delaying some
 instructions.


 Yes, but exactly because of the above example you can start incorrectly
 delaying other insns, too, as you force the insn to be the first in the
 list.  While bumping priority still leaves the scheduler sorting heuristics
 in place and actually lowers that risk.


 Thought ready list doesn't contain DEBUG_INSN... Is it so? If it
 contains them - this could be added easily


 It does, but I'm not sure the sched_reorder hook gets them or they are
 immediately removed -- I saw similar checks in one of the targets' hooks.

 Done with DEBUG_INSN, also 1-imul limit was removed. Patch attached


 Anyways, my main thought was that it is better to test on more benchmarks 
 to
 alleviate the above concerns, so as long as the i386 maintainers are happy,
 I don't see major problems here.  A good idea could be to generalize the
 patch to handle other long latency insns as second consumers, not only
 imuls, if this is relevant for Atom.

 Yes, generalization of this approach is in plans. According to Atom
 Software optimization guide there are several headrooms left here.
 As for trying on more benchmarks - the particular case is indeed quite
 rare. I attached the example
 where patch helps to group imuls in pairs which is profitable for
 Atom. Such and similar codes are not very common.
 But hopefully this approach could help avoid this and other glassjaws.

 BTW, this patch also helps some EEMBC tests when funroll-loops specified.
 So, any feedback from i386 maintainers about this? :)

 Changelog slightly 

Re: [cxx-conversion] New Hash Table (issue6244048)

2012-05-28 Thread Lawrence Crowl
On 5/28/12, Jakub Jelinek ja...@redhat.com wrote:
 On Fri, May 25, 2012 at 02:42:39PM -0700, Lawrence Crowl wrote:
  On 5/24/12, Jakub Jelinek ja...@redhat.com wrote:
   On Thu, May 24, 2012 at 09:43:42AM -0700, Lawrence Crowl wrote:
Add a type-safe hash table, typed_htab.  Uses of this table
replace uses of libiberty's htab_t.  The benefits include
less boiler-plate code, full type safety, and improved
performance.
  
   You haven't looked at the most important problem of that
   approach - code bloat.
 
  Are you claiming that the size of the binary is more important
  than run-time performance, type safety, and source code size?

 Runtime performance goes in hand with the size of the binary,
 at least size of frequently used code.

Well, yes and no.  We need to worry about total size, memory resident
size, and cache resident size.  The patch clearly increases total
size, but I doubt that is much of a factor because most systems
lazily load pages from the image.  I don't think we have enough
information to assess the memory resident size, and I don't think it
matters because large compilations are large because the data space
is much larger than the code space.  The patch reduces the cache
resident size because the dynamic path of instructions is shorter.

 By converting just a couple of hash tables you can't really
 measure it, you'd need to convert a significant number of them,
 then you can see what effect it has on runtime performance.

Well, I have a performance improvement with eight of them, of which
one isn't exercised in the bootstrap.

 As said earlier, GCC has lots of hash tables, and many of them
 are used in performance critical code, by increasing the I-cache
 footprint of that performance criticial code there is risk of
 reducing performance.  The common C++ programming techniques often
 lead to significant code bloat which really shouldn't be ignored.

But the patch potentially reduces the I-cache footprint.  The new
implementation eliminates pointer tests, it turns indirect
function calls into direct function calls, it enables inlining
those functions, etc.  If the compiler is suboptimal in dealing
with that, we should fix the compiler.  (I think the compiler is
behaving reasonably.)

If changing a table does not deliver performance, we can choose
not to convert that table.  We do not need to convert all of them.

-- 
Lawrence Crowl


[patch] disintegrate integrate.[ch]

2012-05-28 Thread Steven Bosscher
Hello,

The old RTL inliner, integrate.c, now only contains a few functions
for initial-value pairs, and a few functions related to inlining.

The attached patch moves the code from integrate.c to (what I hope you
agree to be) better places:

* inliner code goes to tree-inline.c
* functions only called from dwarf2out.c are moved there.
* allocate_initial_values is moved to ira.c
* the initial-value stuff is moved to function.c

The rest is just mechanical updates: Don't include integrate.h
anywhere, and include function.h if something is needed from there.

The files integrate.c and integrate.h can be removed after this change.

Bootstrapped and regtested on powerpc-unknown-linux-gnu. OK?

Ciao!
Steven


disintegrate_integrate.diff
Description: Binary data


Re: [PATCH] Atom: Scheduler improvements for better imul placement

2012-05-28 Thread Uros Bizjak
Hello!

 Ping?

Please at least add and URL to the patch, it took me some time to
found the latest version [1], I'm not even sure if it is the latest
version...

I assume that you cleared all issues with middle-end and scheduler
maintainers, it is not clear from the message.

+   (1) IMUL instrction is on the top of list;

Typo above.

+  static int issue_rate = -1;
+  int n_ready = *pn_ready;
+  rtx insn;
+  rtx insn1;
+  rtx insn2;

Please put three definitions above on the same line.

+  int i;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int index = -1;
+
+  /* set up issue rate */
+  if (issue_rate  0)
+issue_rate = ix86_issue_rate();

Please set issue_rate unconditionally here.  Also, please follow the
GNU style of comments (Full sentence with two spaces after the dot)
everywhere, e.g:

/* Set up issue rate.  */

+  if (!(GET_CODE (SET_SRC (insn)) == MULT
+   GET_MODE (SET_SRC (insn)) == SImode))
+return issue_rate;

Is it correct that only SImode multiplies are checked against SImode
multiplies? Can't we use DImode or HImode multiply (or other
long-latency insns) to put them into the shadow of the first multiply
insn?

As proposed in [2], there are many other fine-tuning approaches
proposed by the scheduler maintainer. OTOH, even the big hammer
approach in the proposed patch makes things better, so it is the step
in the right direction - and it is existing practice anyway.

Under this rationale, I think that the patch should be committed to
mainline. But please also consider proposed fine-tunings to refine the
scheduling accuracy.

So, OK for mainline, if there are no objections from other maintainers
in next two days.

[1] http://gcc.gnu.org/ml/gcc-patches/2012-04/msg00964.html
[2] http://gcc.gnu.org/ml/gcc-patches/2012-04/msg00806.html

Thanks,
Uros.


Re: [PATCH 2/2] arm: add iwMMXt mmx-2.c test

2012-05-28 Thread Matt Turner
On Thu, Apr 5, 2012 at 4:53 AM, Ramana Radhakrishnan
ramana.radhakrish...@linaro.org wrote:
 On 4 April 2012 19:35, Matt Turner matts...@gmail.com wrote:
  gcc/testsuite/gcc.target/arm/mmx-2.c |  158 
 ++
  1 files changed, 158 insertions(+), 0 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/arm/mmx-2.c

 diff --git a/gcc/testsuite/gcc.target/arm/mmx-2.c 
 b/gcc/testsuite/gcc.target/arm/mmx-2.c
 new file mode 100644
 index 000..603a63b
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/arm/mmx-2.c
 @@ -0,0 +1,158 @@
 +/* { dg-do compile } */
 +/* { dg-skip-if Test is specific to the iWMMXt { arm*-*-* } { -mcpu=* } 
 { -mcpu=iwmmxt } } */
 +/* { dg-skip-if Test is specific to the iWMMXt { arm*-*-* } { -mabi=* } 
 { -mabi=iwmmxt } } */
 +/* { dg-skip-if Test is specific to the iWMMXt { arm*-*-* } { -march=* 
 } { -march=iwmmxt } } */
 +/* { dg-skip-if Test is specific to ARM mode { arm*-*-* } { -mthumb } { 
  } } */

 How about simplifying this with a dg-require-effective-target
 arm_arm_ok instead of doing
 dg-require-effective-target arm32 and then skipping it for Thumb2 ?

I might not understand properly, but couldn't I just do this?

/* { dg-require-effective-target arm_iwmmxt_ok } */

Thanks,
Matt


Re: [PATCH 1/2] mips: Add R4600 scheduling support for imul and idiv

2012-05-28 Thread Matt Turner
On Sat, Feb 25, 2012 at 3:11 AM, Richard Sandiford
rdsandif...@googlemail.com wrote:
 Matt Turner matts...@gmail.com writes:
 The r4600_imul and r4600_idiv reservations were correct for si, but
 there were no *_di reservations.

 See page 4 of
 http://www.sgistuff.net/hardware/other/documents/R4600_Prod_OV.pdf

 2012-02-24  Matt Turner  matts...@gmail.com

       * config/mips/4600.md (r4600_imul_si): Rename from r4600_imul.
       (r4600_imul_di): New.
       (r4600_idiv_si): Rename from r4600_idiv.
       (r4600_idiv_di): New.

 Both patches look good, thanks.  Will commit once 4.8 is open and the
 copyright assignment is sorted.

 Richard

Copyright assignment is sorted. Please commit. :)


Re: [PATCH] alpha: add bypasses for fmul/fadd/fcmov - fst/ftoi

2012-05-28 Thread Matt Turner
On Fri, Feb 24, 2012 at 10:53 PM, Matt Turner matts...@gmail.com wrote:
 See section 2.5.3 (page 28) of
 http://download.majix.org/dec/comp_guide_v2.pdf

 2012-02-24  Matt Turner  matts...@gmail.com

        * config/alpha/ev6.md: (define_bypass ev6_fmul,ev6_fadd): New.
        (define_bypass ev6_fcmov): New.
 ---
  gcc/config/alpha/ev6.md |    4 
  1 files changed, 4 insertions(+), 0 deletions(-)

 diff --git a/gcc/config/alpha/ev6.md b/gcc/config/alpha/ev6.md
 index adfe504..a16535a 100644
 --- a/gcc/config/alpha/ev6.md
 +++ b/gcc/config/alpha/ev6.md
 @@ -147,11 +147,15 @@
        (eq_attr type fadd,fcpys,fbr))
   ev6_fa)

 +(define_bypass 6 ev6_fmul,ev6_fadd ev6_fst,ev6_ftoi)
 +
  (define_insn_reservation ev6_fcmov 8
   (and (eq_attr tune ev6)
        (eq_attr type fcmov))
   ev6_fa,nothing*3,ev6_fa)

 +(define_bypass 10 ev6_fcmov ev6_fst,ev6_ftoi)
 +
  (define_insn_reservation ev6_fdivsf 12
   (and (eq_attr tune ev6)
        (and (eq_attr type fdiv)
 --
 1.7.3.4


Copyright assignment is sorted. Please commit. :)


Re: [PATCH] arm: add _mm_empty to mmintrin.h for source compatibility

2012-05-28 Thread Matt Turner
On Tue, Feb 28, 2012 at 7:13 PM, Ramana Radhakrishnan
ramana.radhakrish...@linaro.org wrote:
 On Fri, Feb 24, 2012 at 10:53:35PM -0500, Matt Turner wrote:
 The x86/amd64 mmintrin.h provides the _mm_empty intrinsic for the 'emms'
 MMX instruction. Although ARM does not need such an instruction, we
 should provide an empty _mm_empty function nonetheless for source
 compatibility.

 OK for 4.8 and after your copyright assignment has been
 sorted.

 Ramana


 2012-02-24  Matt Turner  matts...@gmail.com

       * config/arm/mmintrin.h (_mm_empty): New.
 ---
  gcc/config/arm/mmintrin.h |    7 +++
  1 files changed, 7 insertions(+), 0 deletions(-)

 diff --git a/gcc/config/arm/mmintrin.h b/gcc/config/arm/mmintrin.h
 index 2cc500d..ea73bf1 100644
 --- a/gcc/config/arm/mmintrin.h
 +++ b/gcc/config/arm/mmintrin.h
 @@ -32,6 +32,12 @@ typedef int __v2si __attribute__ ((vector_size (8)));
  typedef short __v4hi __attribute__ ((vector_size (8)));
  typedef char __v8qi __attribute__ ((vector_size (8)));

 +/* Provided for source compatibility with MMX.  */
 +extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
 __artificial__))
 +_mm_empty (void)
 +{
 +}
 +
  /* Convert __m64 and __int64 into each other.  */
  static __inline __m64
  _mm_cvtsi64_m64 (__int64 __i)
 @@ -1248,6 +1254,7 @@ _m_from_int (int __a)
  #define _m_psadzbw _mm_sadz_pu8
  #define _m_psadzwd _mm_sadz_pu16
  #define _m_paligniq _mm_align_si64
 +#define _m_empty _mm_empty
  #define _m_cvt_si2pi _mm_cvtsi64_m64
  #define _m_cvt_pi2si _mm_cvtm64_si64

 --
 1.7.3.4


Copyright assignment is sorted. Please commit. :)


Re: [PATCH] arm: Fix iwmmxt shift and logical intrinsics (PR 35294).

2012-05-28 Thread Matt Turner
On Fri, Feb 24, 2012 at 10:53 PM, Matt Turner matts...@gmail.com wrote:
 PR 36798 and 36966 are duplicates.

 2012-02-24  Matt Turner  matts...@gmail.com

        PR target/35294
        * config/arm/arm.c (arm_expand_builtin): Wire up missing
        intrinsics.
 ---
  gcc/config/arm/arm.c |   62 
 +-
  1 files changed, 61 insertions(+), 1 deletions(-)

Drop this patch. Marvell has a five patch series that fixes this and
more. Maybe this patch would be suitable for the 4.6 and 4.7 branches,
since Marvell's adds some features?


Re: [PATCH 1/2] doc: Correct __builtin_arm_tinsr prototype documentation

2012-05-28 Thread Matt Turner
On Wed, Apr 4, 2012 at 2:34 PM, Matt Turner matts...@gmail.com wrote:
 2012-04-04  Matt Turner  matts...@gmail.com

        gcc/
        * doc/extend.texi (__builtin_arm_tinsrb): Add missing second
        parameter.
        (__builtin_arm_tinsrh): Likewise.
        (__builtin_arm_tinsrw): Likewise.
 ---
 This patch and 2/2 are tie-ons to
 http://gcc.gnu.org/ml/gcc-patches/2012-02/msg01269.html

 Still waiting on copyright assignment, but I think this doc patch
 is trivial enough to be committed without it.

  gcc/doc/extend.texi |    6 +++---
  1 files changed, 3 insertions(+), 3 deletions(-)

 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
 index bb43825..966175d 100644
 --- a/gcc/doc/extend.texi
 +++ b/gcc/doc/extend.texi
 @@ -8676,9 +8676,9 @@ int __builtin_arm_textrmsw (v2si, int)
  int __builtin_arm_textrmub (v8qi, int)
  int __builtin_arm_textrmuh (v4hi, int)
  int __builtin_arm_textrmuw (v2si, int)
 -v8qi __builtin_arm_tinsrb (v8qi, int)
 -v4hi __builtin_arm_tinsrh (v4hi, int)
 -v2si __builtin_arm_tinsrw (v2si, int)
 +v8qi __builtin_arm_tinsrb (v8qi, int, int)
 +v4hi __builtin_arm_tinsrh (v4hi, int, int)
 +v2si __builtin_arm_tinsrw (v2si, int, int)
  long long __builtin_arm_tmia (long long, int, int)
  long long __builtin_arm_tmiabb (long long, int, int)
  long long __builtin_arm_tmiabt (long long, int, int)
 --
 1.7.3.4


Copyright assignment is sorted. Please commit. :)


[SH] PR 51340 - Enable -mfused-madd with -funsafe-math-optimizations

2012-05-28 Thread Oleg Endo
Hello,

The attached patch addresses PR 51340.
Tested with 

make info dvi pdf

and

make check-gcc RUNTESTFLAGS=sh.exp=pr51340* --target_board=sh-sim
\{-m4a-single/-ml,-m2/-ml,-m2a/-mb,-m2e/-ml,-m4a/-ml,-m4a-single/-ml,-m4a-single-only/-ml,-m4a-nofpu/-ml}

OK?

Cheers,
Oleg

ChangeLog:

PR target/51340
* config/sh/sh.c (sh_option_override):  Set TARGET_FMAC if  
flag_unsafe_math_optimizations is set.
* doc/invoke.texi (SH Options): Add -mno-fused-madd description.
Update description of -mfused-madd.

testsuite/ChangeLog:

PR target/51340
* gcc/target/sh/pr51340-1.c: New.
* gcc/target/sh/pr51340-2.c: New.
* gcc/target/sh/pr51340-3.c: New.
Index: gcc/testsuite/gcc.target/sh/pr51340-1.c
===
--- gcc/testsuite/gcc.target/sh/pr51340-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51340-1.c	(revision 0)
@@ -0,0 +1,13 @@
+/* Check that the fmac insn is generated when -funsafe-math-optimizations
+   is specified.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 -funsafe-math-optimizations } */
+/* { dg-skip-if  { sh*-*-* } { -m1 -m2 -m4al *nofpu -m4-340* -m4-400* -m4-500* -m5* } {  } }  */
+/* { dg-final { scan-assembler fmac } } */
+
+float
+test_func (float a, float b, float c, float d, float e, float f)
+{
+  return a * b + c * d + e * f;
+}
+
Index: gcc/testsuite/gcc.target/sh/pr51340-2.c
===
--- gcc/testsuite/gcc.target/sh/pr51340-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51340-2.c	(revision 0)
@@ -0,0 +1,12 @@
+/* Check that the fmac insn is not generated when -mno-fused-madd is specified.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 -funsafe-math-optimizations -mno-fused-madd } */
+/* { dg-skip-if  { sh*-*-* } { -m1 -m2 -m4al *nofpu -m4-340* -m4-400* -m4-500* -m5* } {  } }  */
+/* { dg-final { scan-assembler-not fmac } } */
+
+float
+test_func (float a, float b, float c, float d, float e, float f)
+{
+  return a * b + c * d + e * f;
+}
+
Index: gcc/testsuite/gcc.target/sh/pr51340-3.c
===
--- gcc/testsuite/gcc.target/sh/pr51340-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51340-3.c	(revision 0)
@@ -0,0 +1,12 @@
+/* Check that the fmac insn is generated when -mfused-madd is specified.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 -mfused-madd } */
+/* { dg-skip-if  { sh*-*-* } { -m1 -m2 -m4al *nofpu -m4-340* -m4-400* -m4-500* -m5* } {  } }  */
+/* { dg-final { scan-assembler fmac } } */
+
+float
+test_func (float a, float b, float c, float d, float e, float f)
+{
+  return a * b + c * d + e * f;
+}
+
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 187914)
+++ gcc/config/sh/sh.c	(working copy)
@@ -877,6 +877,13 @@
 	align_functions = min_align;
 }
 
+  /* Enable fmac insn for a * b + c SFmode calculations when -ffast-math
+ is enabled and -mno-fused-madd is not specified by the user.
+ The fmac insn can't be enabled by default due to the implied
+ FMA semantics.   See also PR target/29100.  */
+  if (global_options_set.x_TARGET_FMAC == 0  flag_unsafe_math_optimizations)
+TARGET_FMAC = 1;
+
   if (sh_fixed_range_str)
 sh_fix_range (sh_fixed_range_str);
 
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi	(revision 187914)
+++ gcc/doc/invoke.texi	(working copy)
@@ -885,8 +885,8 @@
 -mdivsi3_libfunc=@var{name} -mfixed-range=@var{register-range} @gol
 -mindexed-addressing -mgettrcost=@var{number} -mpt-fixed @gol
 -maccumulate-outgoing-args -minvalid-symbols -msoft-atomic @gol
--mbranch-cost=@var{num} -mcbranchdi -mcmpeqdi -mfused-madd -mpretend-cmove @gol
--menable-tas}
+-mbranch-cost=@var{num} -mcbranchdi -mcmpeqdi -mfused-madd -mno-fused-madd @gol
+-mpretend-cmove -menable-tas}
 
 @emph{Solaris 2 Options}
 @gccoptlist{-mimpure-text  -mno-impure-text @gol
@@ -18237,11 +18237,17 @@
 is in effect.
 
 @item -mfused-madd
+@itemx -mno-fused-madd
 @opindex mfused-madd
-Allow the usage of the @code{fmac} instruction (floating-point
-multiply-accumulate) if the processor type supports it.  Enabling this
-option might generate code that produces different numeric floating-point
-results compared to strict IEEE 754 arithmetic.
+@opindex mno-fused-madd
+If the processor type supports it, setting @code{-mfused-madd} will allow the
+usage of the @code{fmac} instruction (floating-point multiply-accumulate) for
+regular calculations.  Enabling this option might generate faster code but also
+produce different numeric floating-point results compared to strict IEEE 754
+arithmetic.  @code{-mfused-madd} is enabled by default by option
+@option{-funsafe-math-optimizations}.  Setting @code{-mno-fused-madd} will

Committed: trap for CRIS.

2012-05-28 Thread Hans-Peter Nilsson
Implemented because the overhead of calling abort spills over
into the frame of the almost-never-calling function.

Tested cris-elf, committed.

gcc:

* config/cris/cris.h (TARGET_HAS_BREAK, TARGET_TRAP_USING_BREAK8):
New macros.
* config/cris/cris.md (trap): Define, enabled for
TARGET_TRAP_USING_BREAK8.
* config/cris/cris.opt (mtrap-using-break8): New option.

gcc/testsuite:

* gcc.target/cris/torture/trap-1.c,
gcc.target/cris/torture/trap-2.c,
gcc.target/cris/torture/trap-3.c,
gcc.target/cris/torture/trap-v0.c,
gcc.target/cris/torture/trap-v3.c: New tests.

diff --git gcc/config/cris/cris.h gcc/config/cris/cris.h
index 16b038d..c2475b5 100644
--- gcc/config/cris/cris.h
+++ gcc/config/cris/cris.h
@@ -302,9 +302,14 @@ extern int cris_cpu_version;
 
 #define TARGET_HAS_MUL_INSNS (cris_cpu_version = CRIS_CPU_NG)
 #define TARGET_HAS_LZ (cris_cpu_version = CRIS_CPU_ETRAX4)
+#define TARGET_HAS_BREAK (cris_cpu_version = CRIS_CPU_ETRAX4)
 #define TARGET_HAS_SWAP (cris_cpu_version = CRIS_CPU_SVINTO)
 #define TARGET_V32 (cris_cpu_version = CRIS_CPU_V32)
 
+/* The break instruction was introduced with ETRAX 4.  */
+#define TARGET_TRAP_USING_BREAK8 \
+ (cris_trap_using_break8 == 2 ? TARGET_HAS_BREAK : cris_trap_using_break8)
+
 /* Node: Storage Layout */
 
 #define BITS_BIG_ENDIAN 0
diff --git gcc/config/cris/cris.md gcc/config/cris/cris.md
index 09ac8e5..0e4b62d 100644
--- gcc/config/cris/cris.md
+++ gcc/config/cris/cris.md
@@ -3944,6 +3944,14 @@
   
   nop
   [(set_attr cc none)])
+
+;; Same as the gdb trap breakpoint, will cause a SIGTRAP for
+;; cris-linux* and crisv32-linux*, as intended.  Will work in
+;; freestanding environments with sufficient framework.
+(define_insn trap
+  [(trap_if (const_int 1) (const_int 8))]
+  TARGET_TRAP_USING_BREAK8
+  break 8)
 
 ;; We need to stop accesses to the stack after the memory is
 ;; deallocated.  Unfortunately, reorg doesn't look at naked clobbers,
diff --git gcc/config/cris/cris.opt gcc/config/cris/cris.opt
index 07cf646..5c0136e 100644
--- gcc/config/cris/cris.opt
+++ gcc/config/cris/cris.opt
@@ -175,6 +175,10 @@ Target Report RejectNegative Joined 
Var(cris_max_stackframe_str)
 max-stackframe=
 Target Report RejectNegative Joined Undocumented Var(cris_max_stackframe_str)
 
+mtrap-using-break8
+Target Report Var(cris_trap_using_break8) Init(2)
+Emit traps as \break 8\, default for CRIS v3 and up.  If disabled, calls to 
abort() are used.
+
 ; TARGET_SVINTO: Currently this just affects alignment.  FIXME:
 ; Redundant with TARGET_ALIGN_BY_32, or put machine stuff here?
 ; This and the others below could just as well be variables and

diff --git gcc/testsuite/gcc.target/cris/torture/trap-1.c 
gcc/testsuite/gcc.target/cris/torture/trap-1.c
new file mode 100644
index 000..48363fb
--- /dev/null
+++ gcc/testsuite/gcc.target/cris/torture/trap-1.c
@@ -0,0 +1,13 @@
+/* Check that break 8 defaults according to CPU version.  */
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -march* } {  } } */
+/* { dg-final { scan-assembler break 8 { target { ! cris-*-elf } } } } */
+/* { dg-final { scan-assembler-not bsr { target { ! cris-*-elf } } } } */
+/* { dg-final { scan-assembler-not jsr { target { ! cris-*-elf } } } } */
+/* { dg-final { scan-assembler-not break\[ \t\] { target cris-*-elf } } } */
+/* { dg-final { scan-assembler \[jb\]sr \[_\]\?abort { target cris-*-elf } } 
} */
+
+void do_trap (void)
+{
+  __builtin_trap ();
+}
diff --git gcc/testsuite/gcc.target/cris/torture/trap-2.c 
gcc/testsuite/gcc.target/cris/torture/trap-2.c
new file mode 100644
index 000..155d5fe
--- /dev/null
+++ gcc/testsuite/gcc.target/cris/torture/trap-2.c
@@ -0,0 +1,11 @@
+/* As trap-1.c but forcing on.  */
+/* { dg-do compile } */
+/* { dg-options -mtrap-using-break8 } */
+/* { dg-final { scan-assembler break 8 } } */
+/* { dg-final { scan-assembler-not bsr } } */
+/* { dg-final { scan-assembler-not jsr } } */
+
+void do_trap (void)
+{
+  __builtin_trap ();
+}
diff --git gcc/testsuite/gcc.target/cris/torture/trap-3.c 
gcc/testsuite/gcc.target/cris/torture/trap-3.c
new file mode 100644
index 000..dfa0924
--- /dev/null
+++ gcc/testsuite/gcc.target/cris/torture/trap-3.c
@@ -0,0 +1,10 @@
+/* Like trap-1.c and trap-2.c but force calls to abort.  */
+/* { dg-do compile } */
+/* { dg-options -mno-trap-using-break8 } */
+/* { dg-final { scan-assembler-not break\[ \t\] } } */
+/* { dg-final { scan-assembler \[jb\]sr \[_\]\?abort } } */
+
+void do_trap (void)
+{
+  __builtin_trap ();
+}
diff --git gcc/testsuite/gcc.target/cris/torture/trap-v0.c 
gcc/testsuite/gcc.target/cris/torture/trap-v0.c
new file mode 100644
index 000..084fb28
--- /dev/null
+++ gcc/testsuite/gcc.target/cris/torture/trap-v0.c
@@ -0,0 +1,11 @@
+/* As trap-1.c but with CPU version specified, excluding.  */
+/* { dg-do compile } */
+/* { dg-skip-if  { *-*-* } { -march=* } {  } } */
+/* { dg-options -march=v0 } */
+/* { dg-final { 

[PATCH ARM iWMMXt 0/5] Improve iWMMXt support

2012-05-28 Thread Matt Turner

This series was written by Marvell and sent by Xinyu Qi x...@marvell.com
a number of times in the last year.

We (One Laptop per Child) need these patches for reasonable iWMMXt support
and performance. Without them, logical and shift intrinsics cause ICEs,
see PR 35294 and its duplicates 36798 and 36966.

The software compositing library pixman uses MMX intrinsics to optimize
various compositing routines. The following are the minimum execution times
of cairo-perf-trace graphics work loads without and with iWMMXt-optimized
pixman for the image and image16 backends (32-bpp and 16-bpp respectively).

 image   image16
   evolution   33.492 -  29.59030.334 -  24.751
firefox-planet-gnome  191.465 - 173.835   211.297 - 187.570
gnome-system-monitor   51.956 -  44.54952.272 -  40.525
  gnome-terminal-vim   53.625 -  54.55447.593 -  47.341
  grads-heat-map4.439 -   4.165 4.548 -   4.624
   midori-zoomed   38.033 -  28.50038.576 -  26.937
 poppler   41.096 -  31.94941.230 -  31.749
  swfdec-giant-steps   20.062 -  16.91228.294 -  17.286
  swfdec-youtube   42.281 -  37.33552.848 -  47.053
   xfce4-terminal-a1   64.311 -  51.01162.592 -  51.191

We have cleaned up some white-space issues with the patches and fixed a
small bug in patch 4/5 since the last time they were posted in December
(added tandc,textrc,torc,torvsc to the wtype attribute)

Please commit them for 4.8.

For 4.7 and 4.6 please consider committing my patch
[PATCH] arm: Fix iwmmxt shift and logical intrinsics (PR 35294).
which only fixes the logical and shift intrinsics.

Thanks,

Matt Turner


[PATCH ARM iWMMXt 5/5] pipeline description

2012-05-28 Thread Matt Turner
From: Xinyu Qi x...@marvell.com

gcc/
* config/arm/t-arm (MD_INCLUDES): Add marvell-f-iwmmxt.md.
* config/arm/marvell-f-iwmmxt.md: New file.
* config/arm/arm.md (marvell-f-iwmmxt.md): Include.
---
 gcc/config/arm/arm.md  |1 +
 gcc/config/arm/marvell-f-iwmmxt.md |  179 
 gcc/config/arm/t-arm   |1 +
 3 files changed, 181 insertions(+), 0 deletions(-)
 create mode 100644 gcc/config/arm/marvell-f-iwmmxt.md

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b0333c2..baa3b7c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -546,6 +546,7 @@
  (const_string yes)
  (const_string no
 
+(include marvell-f-iwmmxt.md)
 (include arm-generic.md)
 (include arm926ejs.md)
 (include arm1020e.md)
diff --git a/gcc/config/arm/marvell-f-iwmmxt.md 
b/gcc/config/arm/marvell-f-iwmmxt.md
new file mode 100644
index 000..fe8e455
--- /dev/null
+++ b/gcc/config/arm/marvell-f-iwmmxt.md
@@ -0,0 +1,179 @@
+;; Marvell WMMX2 pipeline description
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;; Written by Marvell, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; http://www.gnu.org/licenses/.
+
+
+(define_automaton marvell_f_iwmmxt)
+
+
+;; Pipelines
+
+
+;; This is a 7-stage pipelines:
+;;
+;;MD | MI | ME1 | ME2 | ME3 | ME4 | MW
+;;
+;; There are various bypasses modelled to a greater or lesser extent.
+;;
+;; Latencies in this file correspond to the number of cycles after
+;; the issue stage that it takes for the result of the instruction to
+;; be computed, or for its side-effects to occur.
+
+(define_cpu_unit mf_iwmmxt_MD marvell_f_iwmmxt)
+(define_cpu_unit mf_iwmmxt_MI marvell_f_iwmmxt)
+(define_cpu_unit mf_iwmmxt_ME1 marvell_f_iwmmxt)
+(define_cpu_unit mf_iwmmxt_ME2 marvell_f_iwmmxt)
+(define_cpu_unit mf_iwmmxt_ME3 marvell_f_iwmmxt)
+(define_cpu_unit mf_iwmmxt_ME4 marvell_f_iwmmxt)
+(define_cpu_unit mf_iwmmxt_MW marvell_f_iwmmxt)
+
+(define_reservation mf_iwmmxt_ME
+  mf_iwmmxt_ME1,mf_iwmmxt_ME2,mf_iwmmxt_ME3,mf_iwmmxt_ME4
+)
+
+(define_reservation mf_iwmmxt_pipeline
+  mf_iwmmxt_MD, mf_iwmmxt_MI, mf_iwmmxt_ME, mf_iwmmxt_MW
+)
+
+;; An attribute to indicate whether our reservations are applicable.
+(define_attr marvell_f_iwmmxt yes,no
+  (const (if_then_else (symbol_ref arm_arch_iwmmxt)
+   (const_string yes) (const_string no
+
+
+;; instruction classes
+
+
+;; An attribute appended to instructions for classification
+
+(define_attr wmmxt_shift yes,no
+  (if_then_else (eq_attr wtype wror, wsll, wsra, wsrl)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_pack yes,no
+  (if_then_else (eq_attr wtype waligni, walignr, wmerge, wpack, wshufh, 
wunpckeh, wunpckih, wunpckel, wunpckil)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_mult_c1 yes,no
+  (if_then_else (eq_attr wtype wmac, wmadd, wmiaxy, wmiawxy, wmulw, 
wqmiaxy, wqmulwm)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_mult_c2 yes,no
+  (if_then_else (eq_attr wtype wmul, wqmulm)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_alu_c1 yes,no
+  (if_then_else (eq_attr wtype wabs, wabsdiff, wand, wandn, wmov, wor, 
wxor)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_alu_c2 yes,no
+  (if_then_else (eq_attr wtype wacc, wadd, waddsubhx, wavg2, wavg4, wcmpeq, 
wcmpgt, wmax, wmin, wsub, waddbhus, wsubaddhx)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_alu_c3 yes,no
+  (if_then_else (eq_attr wtype wsad)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_transfer_c1 yes,no
+  (if_then_else (eq_attr wtype tbcst, tinsr, tmcr, tmcrr)
+(const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_transfer_c2 yes,no
+  (if_then_else (eq_attr wtype textrm, tmovmsk, tmrc, tmrrc)
+   (const_string yes) (const_string no))
+)
+
+(define_attr wmmxt_transfer_c3 yes,no
+  (if_then_else (eq_attr wtype 

[PATCH ARM iWMMXt 1/5] ARM code generic change

2012-05-28 Thread Matt Turner
From: Xinyu Qi x...@marvell.com

gcc/
* config/arm/arm.c (FL_IWMMXT2): New define.
(arm_arch_iwmmxt2): New variable.
(arm_option_override): Enable use of iWMMXt with VFP.
Disable use of iWMMXt with NEON. Disable use of iWMMXt under
Thumb mode. Set arm_arch_iwmmxt2.
(arm_expand_binop_builtin): Accept VOIDmode op.
* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Define __IWMMXT2__.
(TARGET_IWMMXT2): New define.
(TARGET_REALLY_IWMMXT2): Likewise.
(arm_arch_iwmmxt2): Declare.
* config/arm/arm-cores.def (iwmmxt2): Add FL_IWMMXT2.
* config/arm/arm-arches.def (iwmmxt2): Likewise.
* config/arm/arm.md (arch): Add iwmmxt2.
(arch_enabled): Handle iwmmxt2.
---
 gcc/config/arm/arm-arches.def |2 +-
 gcc/config/arm/arm-cores.def  |2 +-
 gcc/config/arm/arm.c  |   25 +
 gcc/config/arm/arm.h  |7 +++
 gcc/config/arm/arm.md |6 +-
 5 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def
index 3123426..f4dd6cc 100644
--- a/gcc/config/arm/arm-arches.def
+++ b/gcc/config/arm/arm-arches.def
@@ -57,4 +57,4 @@ ARM_ARCH(armv7-m, cortexm3, 7M,  FL_CO_PROC | 
FL_FOR_ARCH7M)
 ARM_ARCH(armv7e-m, cortexm4,  7EM, FL_CO_PROC |FL_FOR_ARCH7EM)
 ARM_ARCH(ep9312,  ep9312, 4T,  FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4)
 ARM_ARCH(iwmmxt,  iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | 
FL_XSCALE | FL_IWMMXT)
-ARM_ARCH(iwmmxt2, iwmmxt2,5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | 
FL_XSCALE | FL_IWMMXT)
+ARM_ARCH(iwmmxt2, iwmmxt2,5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | 
FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index d82b10b..c82eada 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -105,7 +105,7 @@ ARM_CORE(arm1020e,  arm1020e, 5TE,
 FL_LDSCHED, fastmul)
 ARM_CORE(arm1022e,  arm1022e,5TE, 
FL_LDSCHED, fastmul)
 ARM_CORE(xscale,xscale,  5TE, 
FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
 ARM_CORE(iwmmxt,iwmmxt,  5TE, 
FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
-ARM_CORE(iwmmxt2,   iwmmxt2, 5TE, 
FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
+ARM_CORE(iwmmxt2,   iwmmxt2, 5TE, 
FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2, xscale)
 ARM_CORE(fa606te,   fa606te,  5TE, 
FL_LDSCHED, 9e)
 ARM_CORE(fa626te,   fa626te,  5TE, 
FL_LDSCHED, 9e)
 ARM_CORE(fmp626,fmp626,   5TE, 
FL_LDSCHED, 9e)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7a98197..b0680ab 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -685,6 +685,7 @@ static int thumb_call_reg_needed;
 #define FL_ARM_DIV(1  23)  /* Hardware divide (ARM mode).  */
 
 #define FL_IWMMXT (1  29)  /* XScale v2 or Intel Wireless 
MMX technology.  */
+#define FL_IWMMXT2(1  30)   /* Intel Wireless MMX2 technology.  */
 
 /* Flags that only effect tuning, not available instructions.  */
 #define FL_TUNE(FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
@@ -766,6 +767,9 @@ int arm_arch_cirrus = 0;
 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 int arm_arch_iwmmxt = 0;
 
+/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
+int arm_arch_iwmmxt2 = 0;
+
 /* Nonzero if this chip is an XScale.  */
 int arm_arch_xscale = 0;
 
@@ -1717,6 +1721,7 @@ arm_option_override (void)
   arm_tune_wbuf = (tune_flags  FL_WBUF) != 0;
   arm_tune_xscale = (tune_flags  FL_XSCALE) != 0;
   arm_arch_iwmmxt = (insn_flags  FL_IWMMXT) != 0;
+  arm_arch_iwmmxt2 = (insn_flags  FL_IWMMXT2) != 0;
   arm_arch_thumb_hwdiv = (insn_flags  FL_THUMB_DIV) != 0;
   arm_arch_arm_hwdiv = (insn_flags  FL_ARM_DIV) != 0;
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
@@ -1817,14 +1822,17 @@ arm_option_override (void)
 }
 
   /* FPA and iWMMXt are incompatible because the insn encodings overlap.
- VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
- will ever exist.  GCC makes no attempt to support this combination.  */
-  if (TARGET_IWMMXT  !TARGET_SOFT_FLOAT)
-sorry (iWMMXt and hardware floating point);
+ VFP and iWMMXt however can coexist.  */
+  if (TARGET_IWMMXT  TARGET_HARD_FLOAT  !TARGET_VFP)
+error (iWMMXt and non-VFP floating point unit are incompatible);
+
+  /* iWMMXt and NEON are incompatible.  */
+  if (TARGET_IWMMXT  TARGET_NEON)
+error (iWMMXt and NEON are incompatible);
 
-  /* ??? iWMMXt insn patterns 

[PATCH ARM iWMMXt 2/5] intrinsic head file change

2012-05-28 Thread Matt Turner
From: Xinyu Qi x...@marvell.com

gcc/
* config/arm/mmintrin.h: Use __IWMMXT__ to enable iWMMXt intrinsics.
Use __IWMMXT2__ to enable iWMMXt2 intrinsics.
Use C name-mangling for intrinsics.
(__v8qi): Redefine.
(_mm_cvtsi32_si64, _mm_andnot_si64, _mm_sad_pu8): Revise.
(_mm_sad_pu16, _mm_align_si64, _mm_setwcx, _mm_getwcx): Likewise.
(_m_from_int): Likewise.
(_mm_sada_pu8, _mm_sada_pu16): New intrinsic.
(_mm_alignr0_si64, _mm_alignr1_si64, _mm_alignr2_si64): Likewise.
(_mm_alignr3_si64, _mm_tandcb, _mm_tandch, _mm_tandcw): Likewise.
(_mm_textrcb, _mm_textrch, _mm_textrcw, _mm_torcb): Likewise.
(_mm_torch, _mm_torcw, _mm_tbcst_pi8, _mm_tbcst_pi16): Likewise.
(_mm_tbcst_pi32): Likewise.
(_mm_abs_pi8, _mm_abs_pi16, _mm_abs_pi32): New iWMMXt2 intrinsic.
(_mm_addsubhx_pi16, _mm_absdiff_pu8, _mm_absdiff_pu16): Likewise.
(_mm_absdiff_pu32, _mm_addc_pu16, _mm_addc_pu32): Likewise.
(_mm_avg4_pu8, _mm_avg4r_pu8, _mm_maddx_pi16, _mm_maddx_pu16): Likewise.
(_mm_msub_pi16, _mm_msub_pu16, _mm_mulhi_pi32): Likewise.
(_mm_mulhi_pu32, _mm_mulhir_pi16, _mm_mulhir_pi32): Likewise.
(_mm_mulhir_pu16, _mm_mulhir_pu32, _mm_mullo_pi32): Likewise.
(_mm_qmulm_pi16, _mm_qmulm_pi32, _mm_qmulmr_pi16): Likewise.
(_mm_qmulmr_pi32, _mm_subaddhx_pi16, _mm_addbhusl_pu8): Likewise.
(_mm_addbhusm_pu8, _mm_qmiabb_pi32, _mm_qmiabbn_pi32): Likewise.
(_mm_qmiabt_pi32, _mm_qmiabtn_pi32, _mm_qmiatb_pi32): Likewise.
(_mm_qmiatbn_pi32, _mm_qmiatt_pi32, _mm_qmiattn_pi32): Likewise.
(_mm_wmiabb_si64, _mm_wmiabbn_si64, _mm_wmiabt_si64): Likewise.
(_mm_wmiabtn_si64, _mm_wmiatb_si64, _mm_wmiatbn_si64): Likewise.
(_mm_wmiatt_si64, _mm_wmiattn_si64, _mm_wmiawbb_si64): Likewise.
(_mm_wmiawbbn_si64, _mm_wmiawbt_si64, _mm_wmiawbtn_si64): Likewise.
(_mm_wmiawtb_si64, _mm_wmiawtbn_si64, _mm_wmiawtt_si64): Likewise.
(_mm_wmiawttn_si64, _mm_merge_si64): Likewise.
(_mm_torvscb, _mm_torvsch, _mm_torvscw): Likewise.
(_m_to_int): New define.
---
 gcc/config/arm/mmintrin.h |  649 ++---
 1 files changed, 614 insertions(+), 35 deletions(-)

diff --git a/gcc/config/arm/mmintrin.h b/gcc/config/arm/mmintrin.h
index 2cc500d..0fe551d 100644
--- a/gcc/config/arm/mmintrin.h
+++ b/gcc/config/arm/mmintrin.h
@@ -24,16 +24,30 @@
 #ifndef _MMINTRIN_H_INCLUDED
 #define _MMINTRIN_H_INCLUDED
 
+#ifndef __IWMMXT__
+#error You must enable WMMX/WMMX2 instructions (e.g. -march=iwmmxt or 
-march=iwmmxt2) to use iWMMXt/iWMMXt2 intrinsics
+#else
+
+#ifndef __IWMMXT2__
+#warning You only enable iWMMXt intrinsics. Extended iWMMXt2 intrinsics 
available only if WMMX2 instructions enabled (e.g. -march=iwmmxt2)
+#endif
+
+
+#if defined __cplusplus
+extern C { /* Begin C */
+/* Intrinsics use C name-mangling.  */
+#endif /* __cplusplus */
+
 /* The data type intended for user use.  */
 typedef unsigned long long __m64, __int64;
 
 /* Internal data types for implementing the intrinsics.  */
 typedef int __v2si __attribute__ ((vector_size (8)));
 typedef short __v4hi __attribute__ ((vector_size (8)));
-typedef char __v8qi __attribute__ ((vector_size (8)));
+typedef signed char __v8qi __attribute__ ((vector_size (8)));
 
 /* Convert __m64 and __int64 into each other.  */
-static __inline __m64 
+static __inline __m64
 _mm_cvtsi64_m64 (__int64 __i)
 {
   return __i;
@@ -54,7 +68,7 @@ _mm_cvtsi64_si32 (__int64 __i)
 static __inline __int64
 _mm_cvtsi32_si64 (int __i)
 {
-  return __i;
+  return (__i  0x);
 }
 
 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
@@ -603,7 +617,7 @@ _mm_and_si64 (__m64 __m1, __m64 __m2)
 static __inline __m64
 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
 {
-  return __builtin_arm_wandn (__m1, __m2);
+  return __builtin_arm_wandn (__m2, __m1);
 }
 
 /* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
@@ -935,7 +949,13 @@ _mm_avg2_pu16 (__m64 __A, __m64 __B)
 static __inline __m64
 _mm_sad_pu8 (__m64 __A, __m64 __B)
 {
-  return (__m64) __builtin_arm_wsadb ((__v8qi)__A, (__v8qi)__B);
+  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
+}
+
+static __inline __m64
+_mm_sada_pu8 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return (__m64) __builtin_arm_wsadb ((__v2si)__A, (__v8qi)__B, (__v8qi)__C);
 }
 
 /* Compute the sum of the absolute differences of the unsigned 16-bit
@@ -944,9 +964,16 @@ _mm_sad_pu8 (__m64 __A, __m64 __B)
 static __inline __m64
 _mm_sad_pu16 (__m64 __A, __m64 __B)
 {
-  return (__m64) __builtin_arm_wsadh ((__v4hi)__A, (__v4hi)__B);
+  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
 }
 
+static __inline __m64
+_mm_sada_pu16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return (__m64) __builtin_arm_wsadh ((__v2si)__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+
 /* Compute the sum of the absolute 

[PATCH ARM iWMMXt 3/5] built in define and expand

2012-05-28 Thread Matt Turner
From: Xinyu Qi x...@marvell.com

gcc/
* config/arm/arm.c (enum arm_builtins): Revise built-in fcode.
(IWMMXT2_BUILTIN): New define.
(IWMMXT2_BUILTIN2): Likewise.
(iwmmx2_mbuiltin): Likewise.
(builtin_description bdesc_2arg): Revise built in declaration.
(builtin_description bdesc_1arg): Likewise.
(arm_init_iwmmxt_builtins): Revise built in initialization.
(arm_expand_builtin): Revise built in expansion.
---
 gcc/config/arm/arm.c |  620 +-
 1 files changed, 559 insertions(+), 61 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index b0680ab..51eed40 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -19637,8 +19637,15 @@ static neon_builtin_datum neon_builtin_data[] =
FIXME?  */
 enum arm_builtins
 {
-  ARM_BUILTIN_GETWCX,
-  ARM_BUILTIN_SETWCX,
+  ARM_BUILTIN_GETWCGR0,
+  ARM_BUILTIN_GETWCGR1,
+  ARM_BUILTIN_GETWCGR2,
+  ARM_BUILTIN_GETWCGR3,
+
+  ARM_BUILTIN_SETWCGR0,
+  ARM_BUILTIN_SETWCGR1,
+  ARM_BUILTIN_SETWCGR2,
+  ARM_BUILTIN_SETWCGR3,
 
   ARM_BUILTIN_WZERO,
 
@@ -19661,7 +19668,11 @@ enum arm_builtins
   ARM_BUILTIN_WSADH,
   ARM_BUILTIN_WSADHZ,
 
-  ARM_BUILTIN_WALIGN,
+  ARM_BUILTIN_WALIGNI,
+  ARM_BUILTIN_WALIGNR0,
+  ARM_BUILTIN_WALIGNR1,
+  ARM_BUILTIN_WALIGNR2,
+  ARM_BUILTIN_WALIGNR3,
 
   ARM_BUILTIN_TMIA,
   ARM_BUILTIN_TMIAPH,
@@ -19797,6 +19808,81 @@ enum arm_builtins
   ARM_BUILTIN_WUNPCKELUH,
   ARM_BUILTIN_WUNPCKELUW,
 
+  ARM_BUILTIN_WABSB,
+  ARM_BUILTIN_WABSH,
+  ARM_BUILTIN_WABSW,
+
+  ARM_BUILTIN_WADDSUBHX,
+  ARM_BUILTIN_WSUBADDHX,
+
+  ARM_BUILTIN_WABSDIFFB,
+  ARM_BUILTIN_WABSDIFFH,
+  ARM_BUILTIN_WABSDIFFW,
+
+  ARM_BUILTIN_WADDCH,
+  ARM_BUILTIN_WADDCW,
+
+  ARM_BUILTIN_WAVG4,
+  ARM_BUILTIN_WAVG4R,
+
+  ARM_BUILTIN_WMADDSX,
+  ARM_BUILTIN_WMADDUX,
+
+  ARM_BUILTIN_WMADDSN,
+  ARM_BUILTIN_WMADDUN,
+
+  ARM_BUILTIN_WMULWSM,
+  ARM_BUILTIN_WMULWUM,
+
+  ARM_BUILTIN_WMULWSMR,
+  ARM_BUILTIN_WMULWUMR,
+
+  ARM_BUILTIN_WMULWL,
+
+  ARM_BUILTIN_WMULSMR,
+  ARM_BUILTIN_WMULUMR,
+
+  ARM_BUILTIN_WQMULM,
+  ARM_BUILTIN_WQMULMR,
+
+  ARM_BUILTIN_WQMULWM,
+  ARM_BUILTIN_WQMULWMR,
+
+  ARM_BUILTIN_WADDBHUSM,
+  ARM_BUILTIN_WADDBHUSL,
+
+  ARM_BUILTIN_WQMIABB,
+  ARM_BUILTIN_WQMIABT,
+  ARM_BUILTIN_WQMIATB,
+  ARM_BUILTIN_WQMIATT,
+
+  ARM_BUILTIN_WQMIABBN,
+  ARM_BUILTIN_WQMIABTN,
+  ARM_BUILTIN_WQMIATBN,
+  ARM_BUILTIN_WQMIATTN,
+
+  ARM_BUILTIN_WMIABB,
+  ARM_BUILTIN_WMIABT,
+  ARM_BUILTIN_WMIATB,
+  ARM_BUILTIN_WMIATT,
+
+  ARM_BUILTIN_WMIABBN,
+  ARM_BUILTIN_WMIABTN,
+  ARM_BUILTIN_WMIATBN,
+  ARM_BUILTIN_WMIATTN,
+
+  ARM_BUILTIN_WMIAWBB,
+  ARM_BUILTIN_WMIAWBT,
+  ARM_BUILTIN_WMIAWTB,
+  ARM_BUILTIN_WMIAWTT,
+
+  ARM_BUILTIN_WMIAWBBN,
+  ARM_BUILTIN_WMIAWBTN,
+  ARM_BUILTIN_WMIAWTBN,
+  ARM_BUILTIN_WMIAWTTN,
+
+  ARM_BUILTIN_WMERGE,
+
   ARM_BUILTIN_THREAD_POINTER,
 
   ARM_BUILTIN_NEON_BASE,
@@ -20329,6 +20415,10 @@ static const struct builtin_description bdesc_2arg[] =
   { FL_IWMMXT, CODE_FOR_##code, __builtin_arm_ string, \
 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
 
+#define IWMMXT2_BUILTIN(code, string, builtin) \
+  { FL_IWMMXT2, CODE_FOR_##code, __builtin_arm_ string, \
+ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
   IWMMXT_BUILTIN (addv8qi3, waddb, WADDB)
   IWMMXT_BUILTIN (addv4hi3, waddh, WADDH)
   IWMMXT_BUILTIN (addv2si3, waddw, WADDW)
@@ -20385,44 +20475,45 @@ static const struct builtin_description bdesc_2arg[] =
   IWMMXT_BUILTIN (iwmmxt_wunpckihb, wunpckihb, WUNPCKIHB)
   IWMMXT_BUILTIN (iwmmxt_wunpckihh, wunpckihh, WUNPCKIHH)
   IWMMXT_BUILTIN (iwmmxt_wunpckihw, wunpckihw, WUNPCKIHW)
-  IWMMXT_BUILTIN (iwmmxt_wmadds, wmadds, WMADDS)
-  IWMMXT_BUILTIN (iwmmxt_wmaddu, wmaddu, WMADDU)
+  IWMMXT2_BUILTIN (iwmmxt_waddsubhx, waddsubhx, WADDSUBHX)
+  IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, wsubaddhx, WSUBADDHX)
+  IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, wabsdiffb, WABSDIFFB)
+  IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, wabsdiffh, WABSDIFFH)
+  IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, wabsdiffw, WABSDIFFW)
+  IWMMXT2_BUILTIN (iwmmxt_avg4, wavg4, WAVG4)
+  IWMMXT2_BUILTIN (iwmmxt_avg4r, wavg4r, WAVG4R)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwsm, wmulwsm, WMULWSM)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwum, wmulwum, WMULWUM)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, wmulwsmr, WMULWSMR)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwumr, wmulwumr, WMULWUMR)
+  IWMMXT2_BUILTIN (iwmmxt_wmulwl, wmulwl, WMULWL)
+  IWMMXT2_BUILTIN (iwmmxt_wmulsmr, wmulsmr, WMULSMR)
+  IWMMXT2_BUILTIN (iwmmxt_wmulumr, wmulumr, WMULUMR)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulm, wqmulm, WQMULM)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulmr, wqmulmr, WQMULMR)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulwm, wqmulwm, WQMULWM)
+  IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, wqmulwmr, WQMULWMR)
+  IWMMXT_BUILTIN (iwmmxt_walignr0, walignr0, WALIGNR0)
+  IWMMXT_BUILTIN (iwmmxt_walignr1, walignr1, WALIGNR1)
+  IWMMXT_BUILTIN (iwmmxt_walignr2, walignr2, WALIGNR2)
+  IWMMXT_BUILTIN (iwmmxt_walignr3, walignr3, WALIGNR3)
 
 #define 

Re: [SH] PR 52941 - Add support for movco.l / movli.l atomics on SH4A

2012-05-28 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch is the same as the last proposed patch in the PR.
 Re-tested it on sh-sim against rev 187914 with -msoft-atomic and
 -mhard-atomic enabled, as well with 'make info dvi pdf'.
 
 I was hoping that after recent reload patches the R0 spill failure
 problem
 (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50751#c29 ) might go
 away, but it didn't.  The gcc.c-torture/compile/sync-1.c test still
 fails when -mhard-atomic is enabled.
 Actually this patch does not introduce the spill failure problem, the
 sync-1 test case just makes it obvious.  The actual cause comes from the
 patches of PR 50751 and (as far as I understand) some missing cases in
 reload, which should be addressed separately, I think.
 
 OK to apply?

OK.

Regards,
kaz


Re: [SH] PR 51340 - Enable -mfused-madd with -funsafe-math-optimizations

2012-05-28 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch addresses PR 51340.
 Tested with 
 
 make info dvi pdf
 
 and
 
 make check-gcc RUNTESTFLAGS=sh.exp=pr51340* --target_board=sh-sim
 \{-m4a-single/-ml,-m2/-ml,-m2a/-mb,-m2e/-ml,-m4a/-ml,-m4a-single/-ml,-m4a-single-only/-ml,-m4a-nofpu/-ml}
 
 OK?

OK.

Regards,
kaz