The vector compare intrinsics (vc[gl][et]z, vceqz, vtst) were written using
__builtin functions as (IIUC) at the time gcc vector extensions did not support
comparison ops across both C and C++ frontends. These have since been updated.
Following the first patch, we now get equal/better code generation from using
gcc vector extensions (specifically, TST instructions are generated again, and
all NOTs are eliminated), so we can remove a bunch of code and builtins :).
Tested with check-gcc and check-g++ on aarch64-none-elf, aarch64.exp+simd.exp on
aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code
handling cmge, cmgt, cmeq, cmtst.
* config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
cmlt, cmgeu, cmgtu, cmtst): Remove.
* config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index c3df73e..aa2c40c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1215,22 +1215,6 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
BUILTIN_VALLDI (UNOP, abs, 2)
return fold_build1 (ABS_EXPR, type, args[0]);
break;
- BUILTIN_VALLDI (BINOP, cmge, 0)
- return fold_build2 (GE_EXPR, type, args[0], args[1]);
- break;
- BUILTIN_VALLDI (BINOP, cmgt, 0)
- return fold_build2 (GT_EXPR, type, args[0], args[1]);
- break;
- BUILTIN_VALLDI (BINOP, cmeq, 0)
- return fold_build2 (EQ_EXPR, type, args[0], args[1]);
- break;
- BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
- {
- tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
- tree vec_zero_node = build_zero_cst (type);
- return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
- break;
- }
VAR1 (REINTERP_SS, reinterpretdi, 0, df)
VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ae52469..9320e99 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -240,17 +240,6 @@
BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
- /* Implemented by aarch64_cmcmpmode. */
- BUILTIN_VALLDI (BINOP, cmeq, 0)
- BUILTIN_VALLDI (BINOP, cmge, 0)
- BUILTIN_VALLDI (BINOP, cmgt, 0)
- BUILTIN_VALLDI (BINOP, cmle, 0)
- BUILTIN_VALLDI (BINOP, cmlt, 0)
- /* Implemented by aarch64_cmcmpmode. */
- BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
- BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
- BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
-
/* Implemented by reduc_surplus_mode. */
BUILTIN_VALL (UNOP, reduc_splus_, 10)
BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e7485f0..ea56b82 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -14632,7 +14632,7 @@ vcaltq_f64 (float64x2_t __a, float64x2_t __b)
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_f32 (float32x2_t __a, float32x2_t __b)
{
- return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+ return (uint32x2_t) (__a == __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -14644,26 +14644,25 @@ vceq_f64 (float64x1_t __a, float64x1_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_p8 (poly8x8_t __a, poly8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
- (int8x8_t) __b);
+ return (uint8x8_t) (__a == __b);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_s8 (int8x8_t __a, int8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+ return (uint8x8_t) (__a == __b);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vceq_s16 (int16x4_t __a, int16x4_t __b)
{
- return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+ return (uint16x4_t) (__a == __b);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vceq_s32 (int32x2_t __a, int32x2_t __b)
{
- return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+ return (uint32x2_t) (__a == __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -14675,22 +14674,19 @@ vceq_s64 (int64x1_t __a, int64x1_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
- (int8x8_t) __b);
+ return