Re: [PATCH AArch64 2/2] Remove vector compare/tst __builtins

2014-09-02 Thread Marcus Shawcroft
On 19 August 2014 14:43, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove
 code
 handling cmge, cmgt, cmeq, cmtst.

 * config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
 cmlt, cmgeu, cmgtu, cmtst): Remove.

 * config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
 vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
 vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
 vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.

OK /Marcus


[PATCH AArch64 2/2] Remove vector compare/tst __builtins

2014-08-19 Thread Alan Lawrence
The vector compare intrinsics (vc[gl][et]z, vceqz, vtst) were written using 
__builtin functions as (IIUC) at the time gcc vector extensions did not support 
comparison ops across both C and C++ frontends. These have since been updated.


Following the first patch, we now get equal/better code generation from using 
gcc vector extensions (specifically, TST instructions are generated again, and 
all NOTs are eliminated), so we can remove a bunch of code and builtins :).


Tested with check-gcc and check-g++ on aarch64-none-elf, aarch64.exp+simd.exp on 
aarch64_be-none-elf.


gcc/ChangeLog:

* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code
handling cmge, cmgt, cmeq, cmtst.

* config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
cmlt, cmgeu, cmgtu, cmtst): Remove.

* config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index c3df73e..aa2c40c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1215,22 +1215,6 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
   BUILTIN_VALLDI (UNOP, abs, 2)
 	return fold_build1 (ABS_EXPR, type, args[0]);
 	break;
-  BUILTIN_VALLDI (BINOP, cmge, 0)
-	return fold_build2 (GE_EXPR, type, args[0], args[1]);
-	break;
-  BUILTIN_VALLDI (BINOP, cmgt, 0)
-	return fold_build2 (GT_EXPR, type, args[0], args[1]);
-	break;
-  BUILTIN_VALLDI (BINOP, cmeq, 0)
-	return fold_build2 (EQ_EXPR, type, args[0], args[1]);
-	break;
-  BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
-	{
-	  tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
-	  tree vec_zero_node = build_zero_cst (type);
-	  return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
-	  break;
-	}
   VAR1 (REINTERP_SS, reinterpretdi, 0, df)
   VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
   VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ae52469..9320e99 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -240,17 +240,6 @@
   BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
   BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
 
-  /* Implemented by aarch64_cmcmpmode.  */
-  BUILTIN_VALLDI (BINOP, cmeq, 0)
-  BUILTIN_VALLDI (BINOP, cmge, 0)
-  BUILTIN_VALLDI (BINOP, cmgt, 0)
-  BUILTIN_VALLDI (BINOP, cmle, 0)
-  BUILTIN_VALLDI (BINOP, cmlt, 0)
-  /* Implemented by aarch64_cmcmpmode.  */
-  BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
-  BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
-  BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
-
   /* Implemented by reduc_surplus_mode.  */
   BUILTIN_VALL (UNOP, reduc_splus_, 10)
   BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e7485f0..ea56b82 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -14632,7 +14632,7 @@ vcaltq_f64 (float64x2_t __a, float64x2_t __b)
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceq_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+  return (uint32x2_t) (__a == __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -14644,26 +14644,25 @@ vceq_f64 (float64x1_t __a, float64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-		 (int8x8_t) __b);
+  return (uint8x8_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceq_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+  return (uint8x8_t) (__a == __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vceq_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+  return (uint16x4_t) (__a == __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceq_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+  return (uint32x2_t) (__a == __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -14675,22 +14674,19 @@ vceq_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-		 (int8x8_t) __b);
+  return